blob: 2f80bf0496bb3f6256434dc2fc8b223bd716befa [file] [log] [blame]
Larry Hastingsf256c222014-01-25 21:30:37 -08001/* _lzma - Low-level Python interface to liblzma.
2
3 Initial implementation by Per Øyvind Karlsen.
4 Rewritten by Nadeem Vawda.
5
6*/
7
8#define PY_SSIZE_T_CLEAN
9
10#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020011#include "structmember.h" // PyMemberDef
Larry Hastingsf256c222014-01-25 21:30:37 -080012
13#include <stdarg.h>
14#include <string.h>
15
16#include <lzma.h>
17
Ma Linf9bedb62021-04-28 14:58:54 +080018// Blocks output buffer wrappers
19#include "pycore_blocks_output_buffer.h"
20
21#if OUTPUT_BUFFER_MAX_BLOCK_SIZE > SIZE_MAX
22 #error "The maximum block size accepted by liblzma is SIZE_MAX."
23#endif
24
25/* On success, return value >= 0
26 On failure, return -1 */
27static inline Py_ssize_t
Ma Lin251ffa92021-05-01 07:32:49 +080028OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length,
29 uint8_t **next_out, size_t *avail_out)
Ma Linf9bedb62021-04-28 14:58:54 +080030{
31 Py_ssize_t allocated;
32
33 allocated = _BlocksOutputBuffer_InitAndGrow(
34 buffer, max_length, (void**) next_out);
35 *avail_out = (size_t) allocated;
36 return allocated;
37}
38
39/* On success, return value >= 0
40 On failure, return -1 */
41static inline Py_ssize_t
Ma Lin251ffa92021-05-01 07:32:49 +080042OutputBuffer_Grow(_BlocksOutputBuffer *buffer,
43 uint8_t **next_out, size_t *avail_out)
Ma Linf9bedb62021-04-28 14:58:54 +080044{
45 Py_ssize_t allocated;
46
47 allocated = _BlocksOutputBuffer_Grow(
48 buffer, (void**) next_out, (Py_ssize_t) *avail_out);
49 *avail_out = (size_t) allocated;
50 return allocated;
51}
52
53static inline Py_ssize_t
Ma Lin251ffa92021-05-01 07:32:49 +080054OutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer, size_t avail_out)
Ma Linf9bedb62021-04-28 14:58:54 +080055{
56 return _BlocksOutputBuffer_GetDataSize(buffer, (Py_ssize_t) avail_out);
57}
58
59static inline PyObject *
Ma Lin251ffa92021-05-01 07:32:49 +080060OutputBuffer_Finish(_BlocksOutputBuffer *buffer, size_t avail_out)
Ma Linf9bedb62021-04-28 14:58:54 +080061{
62 return _BlocksOutputBuffer_Finish(buffer, (Py_ssize_t) avail_out);
63}
64
65static inline void
Ma Lin251ffa92021-05-01 07:32:49 +080066OutputBuffer_OnError(_BlocksOutputBuffer *buffer)
Ma Linf9bedb62021-04-28 14:58:54 +080067{
68 _BlocksOutputBuffer_OnError(buffer);
69}
70
71
Larry Hastingsf256c222014-01-25 21:30:37 -080072#define ACQUIRE_LOCK(obj) do { \
73 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
74 Py_BEGIN_ALLOW_THREADS \
75 PyThread_acquire_lock((obj)->lock, 1); \
76 Py_END_ALLOW_THREADS \
77 } } while (0)
78#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
Larry Hastingsf256c222014-01-25 21:30:37 -080079
Dong-hee Na1937edd2020-06-23 00:53:07 +090080typedef struct {
81 PyTypeObject *lzma_compressor_type;
82 PyTypeObject *lzma_decompressor_type;
83 PyObject *error;
84 PyObject *empty_tuple;
85} _lzma_state;
86
87static inline _lzma_state*
88get_lzma_state(PyObject *module)
89{
90 void *state = PyModule_GetState(module);
91 assert(state != NULL);
92 return (_lzma_state *)state;
93}
Larry Hastingsf256c222014-01-25 21:30:37 -080094
95/* Container formats: */
96enum {
97 FORMAT_AUTO,
98 FORMAT_XZ,
99 FORMAT_ALONE,
100 FORMAT_RAW,
101};
102
103#define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1)
104
105
106typedef struct {
107 PyObject_HEAD
108 lzma_allocator alloc;
109 lzma_stream lzs;
110 int flushed;
Larry Hastingsf256c222014-01-25 21:30:37 -0800111 PyThread_type_lock lock;
Larry Hastingsf256c222014-01-25 21:30:37 -0800112} Compressor;
113
114typedef struct {
115 PyObject_HEAD
116 lzma_allocator alloc;
117 lzma_stream lzs;
118 int check;
119 char eof;
120 PyObject *unused_data;
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100121 char needs_input;
122 uint8_t *input_buffer;
123 size_t input_buffer_size;
Larry Hastingsf256c222014-01-25 21:30:37 -0800124 PyThread_type_lock lock;
Larry Hastingsf256c222014-01-25 21:30:37 -0800125} Decompressor;
126
Larry Hastingsf256c222014-01-25 21:30:37 -0800127/* Helper functions. */
128
129static int
Dong-hee Na1937edd2020-06-23 00:53:07 +0900130catch_lzma_error(_lzma_state *state, lzma_ret lzret)
Larry Hastingsf256c222014-01-25 21:30:37 -0800131{
132 switch (lzret) {
133 case LZMA_OK:
134 case LZMA_GET_CHECK:
135 case LZMA_NO_CHECK:
136 case LZMA_STREAM_END:
137 return 0;
138 case LZMA_UNSUPPORTED_CHECK:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900139 PyErr_SetString(state->error, "Unsupported integrity check");
Larry Hastingsf256c222014-01-25 21:30:37 -0800140 return 1;
141 case LZMA_MEM_ERROR:
142 PyErr_NoMemory();
143 return 1;
144 case LZMA_MEMLIMIT_ERROR:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900145 PyErr_SetString(state->error, "Memory usage limit exceeded");
Larry Hastingsf256c222014-01-25 21:30:37 -0800146 return 1;
147 case LZMA_FORMAT_ERROR:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900148 PyErr_SetString(state->error, "Input format not supported by decoder");
Larry Hastingsf256c222014-01-25 21:30:37 -0800149 return 1;
150 case LZMA_OPTIONS_ERROR:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900151 PyErr_SetString(state->error, "Invalid or unsupported options");
Larry Hastingsf256c222014-01-25 21:30:37 -0800152 return 1;
153 case LZMA_DATA_ERROR:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900154 PyErr_SetString(state->error, "Corrupt input data");
Larry Hastingsf256c222014-01-25 21:30:37 -0800155 return 1;
156 case LZMA_BUF_ERROR:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900157 PyErr_SetString(state->error, "Insufficient buffer space");
Larry Hastingsf256c222014-01-25 21:30:37 -0800158 return 1;
159 case LZMA_PROG_ERROR:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900160 PyErr_SetString(state->error, "Internal error");
Larry Hastingsf256c222014-01-25 21:30:37 -0800161 return 1;
162 default:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900163 PyErr_Format(state->error, "Unrecognized error from liblzma: %d", lzret);
Larry Hastingsf256c222014-01-25 21:30:37 -0800164 return 1;
165 }
166}
167
168static void*
169PyLzma_Malloc(void *opaque, size_t items, size_t size)
170{
Dong-hee Na1937edd2020-06-23 00:53:07 +0900171 if (size != 0 && items > (size_t)PY_SSIZE_T_MAX / size) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800172 return NULL;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900173 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800174 /* PyMem_Malloc() cannot be used:
175 the GIL is not held when lzma_code() is called */
176 return PyMem_RawMalloc(items * size);
177}
178
179static void
180PyLzma_Free(void *opaque, void *ptr)
181{
182 PyMem_RawFree(ptr);
183}
184
Larry Hastingsf256c222014-01-25 21:30:37 -0800185
186/* Some custom type conversions for PyArg_ParseTupleAndKeywords(),
187 since the predefined conversion specifiers do not suit our needs:
188
189 uint32_t - the "I" (unsigned int) specifier is the right size, but
190 silently ignores overflows on conversion.
191
Benjamin Petersonaf580df2016-09-06 10:46:49 -0700192 lzma_vli - the "K" (unsigned long long) specifier is the right
Larry Hastingsf256c222014-01-25 21:30:37 -0800193 size, but like "I" it silently ignores overflows on conversion.
194
195 lzma_mode and lzma_match_finder - these are enumeration types, and
196 so the size of each is implementation-defined. Worse, different
197 enum types can be of different sizes within the same program, so
198 to be strictly correct, we need to define two separate converters.
199 */
200
201#define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \
202 static int \
203 FUNCNAME(PyObject *obj, void *ptr) \
204 { \
Benjamin Petersonaf580df2016-09-06 10:46:49 -0700205 unsigned long long val; \
Larry Hastingsf256c222014-01-25 21:30:37 -0800206 \
207 val = PyLong_AsUnsignedLongLong(obj); \
208 if (PyErr_Occurred()) \
209 return 0; \
Benjamin Petersonaf580df2016-09-06 10:46:49 -0700210 if ((unsigned long long)(TYPE)val != val) { \
Larry Hastingsf256c222014-01-25 21:30:37 -0800211 PyErr_SetString(PyExc_OverflowError, \
212 "Value too large for " #TYPE " type"); \
213 return 0; \
214 } \
215 *(TYPE *)ptr = (TYPE)val; \
216 return 1; \
217 }
218
219INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
220INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter)
221INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
222INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
223
224#undef INT_TYPE_CONVERTER_FUNC
225
226
227/* Filter specifier parsing.
228
229 This code handles converting filter specifiers (Python dicts) into
230 the C lzma_filter structs expected by liblzma. */
231
232static void *
Dong-hee Na1937edd2020-06-23 00:53:07 +0900233parse_filter_spec_lzma(_lzma_state *state, PyObject *spec)
Larry Hastingsf256c222014-01-25 21:30:37 -0800234{
235 static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp",
236 "pb", "mode", "nice_len", "mf", "depth", NULL};
237 PyObject *id;
238 PyObject *preset_obj;
239 uint32_t preset = LZMA_PRESET_DEFAULT;
240 lzma_options_lzma *options;
241
242 /* First, fill in default values for all the options using a preset.
243 Then, override the defaults with any values given by the caller. */
244
245 preset_obj = PyMapping_GetItemString(spec, "preset");
246 if (preset_obj == NULL) {
Dong-hee Na1937edd2020-06-23 00:53:07 +0900247 if (PyErr_ExceptionMatches(PyExc_KeyError)) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800248 PyErr_Clear();
Dong-hee Na1937edd2020-06-23 00:53:07 +0900249 }
250 else {
Larry Hastingsf256c222014-01-25 21:30:37 -0800251 return NULL;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900252 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800253 } else {
254 int ok = uint32_converter(preset_obj, &preset);
255 Py_DECREF(preset_obj);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900256 if (!ok) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800257 return NULL;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900258 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800259 }
260
Andy Lester7668a8b2020-03-24 23:26:44 -0500261 options = (lzma_options_lzma *)PyMem_Calloc(1, sizeof *options);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900262 if (options == NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800263 return PyErr_NoMemory();
Dong-hee Na1937edd2020-06-23 00:53:07 +0900264 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800265
266 if (lzma_lzma_preset(options, preset)) {
267 PyMem_Free(options);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900268 PyErr_Format(state->error, "Invalid compression preset: %u", preset);
Larry Hastingsf256c222014-01-25 21:30:37 -0800269 return NULL;
270 }
271
Dong-hee Na1937edd2020-06-23 00:53:07 +0900272 if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec,
Larry Hastingsf256c222014-01-25 21:30:37 -0800273 "|OOO&O&O&O&O&O&O&O&", optnames,
274 &id, &preset_obj,
275 uint32_converter, &options->dict_size,
276 uint32_converter, &options->lc,
277 uint32_converter, &options->lp,
278 uint32_converter, &options->pb,
279 lzma_mode_converter, &options->mode,
280 uint32_converter, &options->nice_len,
281 lzma_mf_converter, &options->mf,
282 uint32_converter, &options->depth)) {
283 PyErr_SetString(PyExc_ValueError,
284 "Invalid filter specifier for LZMA filter");
285 PyMem_Free(options);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900286 return NULL;
Larry Hastingsf256c222014-01-25 21:30:37 -0800287 }
Dong-hee Na1937edd2020-06-23 00:53:07 +0900288
Larry Hastingsf256c222014-01-25 21:30:37 -0800289 return options;
290}
291
292static void *
Dong-hee Na1937edd2020-06-23 00:53:07 +0900293parse_filter_spec_delta(_lzma_state *state, PyObject *spec)
Larry Hastingsf256c222014-01-25 21:30:37 -0800294{
295 static char *optnames[] = {"id", "dist", NULL};
296 PyObject *id;
297 uint32_t dist = 1;
298 lzma_options_delta *options;
299
Dong-hee Na1937edd2020-06-23 00:53:07 +0900300 if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
Larry Hastingsf256c222014-01-25 21:30:37 -0800301 &id, uint32_converter, &dist)) {
302 PyErr_SetString(PyExc_ValueError,
303 "Invalid filter specifier for delta filter");
304 return NULL;
305 }
306
Andy Lester7668a8b2020-03-24 23:26:44 -0500307 options = (lzma_options_delta *)PyMem_Calloc(1, sizeof *options);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900308 if (options == NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800309 return PyErr_NoMemory();
Dong-hee Na1937edd2020-06-23 00:53:07 +0900310 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800311 options->type = LZMA_DELTA_TYPE_BYTE;
312 options->dist = dist;
313 return options;
314}
315
316static void *
Dong-hee Na1937edd2020-06-23 00:53:07 +0900317parse_filter_spec_bcj(_lzma_state *state, PyObject *spec)
Larry Hastingsf256c222014-01-25 21:30:37 -0800318{
319 static char *optnames[] = {"id", "start_offset", NULL};
320 PyObject *id;
321 uint32_t start_offset = 0;
322 lzma_options_bcj *options;
323
Dong-hee Na1937edd2020-06-23 00:53:07 +0900324 if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
Larry Hastingsf256c222014-01-25 21:30:37 -0800325 &id, uint32_converter, &start_offset)) {
326 PyErr_SetString(PyExc_ValueError,
327 "Invalid filter specifier for BCJ filter");
328 return NULL;
329 }
330
Andy Lester7668a8b2020-03-24 23:26:44 -0500331 options = (lzma_options_bcj *)PyMem_Calloc(1, sizeof *options);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900332 if (options == NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800333 return PyErr_NoMemory();
Dong-hee Na1937edd2020-06-23 00:53:07 +0900334 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800335 options->start_offset = start_offset;
336 return options;
337}
338
339static int
Dong-hee Na1937edd2020-06-23 00:53:07 +0900340lzma_filter_converter(_lzma_state *state, PyObject *spec, void *ptr)
Larry Hastingsf256c222014-01-25 21:30:37 -0800341{
342 lzma_filter *f = (lzma_filter *)ptr;
343 PyObject *id_obj;
344
345 if (!PyMapping_Check(spec)) {
346 PyErr_SetString(PyExc_TypeError,
347 "Filter specifier must be a dict or dict-like object");
348 return 0;
349 }
350 id_obj = PyMapping_GetItemString(spec, "id");
351 if (id_obj == NULL) {
352 if (PyErr_ExceptionMatches(PyExc_KeyError))
353 PyErr_SetString(PyExc_ValueError,
354 "Filter specifier must have an \"id\" entry");
355 return 0;
356 }
357 f->id = PyLong_AsUnsignedLongLong(id_obj);
358 Py_DECREF(id_obj);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900359 if (PyErr_Occurred()) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800360 return 0;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900361 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800362
363 switch (f->id) {
364 case LZMA_FILTER_LZMA1:
365 case LZMA_FILTER_LZMA2:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900366 f->options = parse_filter_spec_lzma(state, spec);
Larry Hastingsf256c222014-01-25 21:30:37 -0800367 return f->options != NULL;
368 case LZMA_FILTER_DELTA:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900369 f->options = parse_filter_spec_delta(state, spec);
Larry Hastingsf256c222014-01-25 21:30:37 -0800370 return f->options != NULL;
371 case LZMA_FILTER_X86:
372 case LZMA_FILTER_POWERPC:
373 case LZMA_FILTER_IA64:
374 case LZMA_FILTER_ARM:
375 case LZMA_FILTER_ARMTHUMB:
376 case LZMA_FILTER_SPARC:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900377 f->options = parse_filter_spec_bcj(state, spec);
Larry Hastingsf256c222014-01-25 21:30:37 -0800378 return f->options != NULL;
379 default:
380 PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
381 return 0;
382 }
383}
384
385static void
386free_filter_chain(lzma_filter filters[])
387{
Dong-hee Na1937edd2020-06-23 00:53:07 +0900388 for (int i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800389 PyMem_Free(filters[i].options);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900390 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800391}
392
393static int
Dong-hee Na1937edd2020-06-23 00:53:07 +0900394parse_filter_chain_spec(_lzma_state *state, lzma_filter filters[], PyObject *filterspecs)
Larry Hastingsf256c222014-01-25 21:30:37 -0800395{
396 Py_ssize_t i, num_filters;
397
398 num_filters = PySequence_Length(filterspecs);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900399 if (num_filters == -1) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800400 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900401 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800402 if (num_filters > LZMA_FILTERS_MAX) {
403 PyErr_Format(PyExc_ValueError,
404 "Too many filters - liblzma supports a maximum of %d",
405 LZMA_FILTERS_MAX);
406 return -1;
407 }
408
409 for (i = 0; i < num_filters; i++) {
410 int ok = 1;
411 PyObject *spec = PySequence_GetItem(filterspecs, i);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900412 if (spec == NULL || !lzma_filter_converter(state, spec, &filters[i])) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800413 ok = 0;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900414 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800415 Py_XDECREF(spec);
416 if (!ok) {
417 filters[i].id = LZMA_VLI_UNKNOWN;
418 free_filter_chain(filters);
419 return -1;
420 }
421 }
422 filters[num_filters].id = LZMA_VLI_UNKNOWN;
423 return 0;
424}
425
426
427/* Filter specifier construction.
428
429 This code handles converting C lzma_filter structs into
430 Python-level filter specifiers (represented as dicts). */
431
432static int
Benjamin Petersonaf580df2016-09-06 10:46:49 -0700433spec_add_field(PyObject *spec, _Py_Identifier *key, unsigned long long value)
Larry Hastingsf256c222014-01-25 21:30:37 -0800434{
435 int status;
436 PyObject *value_object;
437
438 value_object = PyLong_FromUnsignedLongLong(value);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900439 if (value_object == NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800440 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900441 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800442
443 status = _PyDict_SetItemId(spec, key, value_object);
444 Py_DECREF(value_object);
445 return status;
446}
447
448static PyObject *
449build_filter_spec(const lzma_filter *f)
450{
451 PyObject *spec;
452
453 spec = PyDict_New();
Dong-hee Na1937edd2020-06-23 00:53:07 +0900454 if (spec == NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800455 return NULL;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900456 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800457
458#define ADD_FIELD(SOURCE, FIELD) \
459 do { \
460 _Py_IDENTIFIER(FIELD); \
461 if (spec_add_field(spec, &PyId_##FIELD, SOURCE->FIELD) == -1) \
462 goto error;\
463 } while (0)
464
465 ADD_FIELD(f, id);
466
467 switch (f->id) {
468 /* For LZMA1 filters, lzma_properties_{encode,decode}() only look at the
469 lc, lp, pb, and dict_size fields. For LZMA2 filters, only the
470 dict_size field is used. */
471 case LZMA_FILTER_LZMA1: {
472 lzma_options_lzma *options = f->options;
473 ADD_FIELD(options, lc);
474 ADD_FIELD(options, lp);
475 ADD_FIELD(options, pb);
476 ADD_FIELD(options, dict_size);
477 break;
478 }
479 case LZMA_FILTER_LZMA2: {
480 lzma_options_lzma *options = f->options;
481 ADD_FIELD(options, dict_size);
482 break;
483 }
484 case LZMA_FILTER_DELTA: {
485 lzma_options_delta *options = f->options;
486 ADD_FIELD(options, dist);
487 break;
488 }
489 case LZMA_FILTER_X86:
490 case LZMA_FILTER_POWERPC:
491 case LZMA_FILTER_IA64:
492 case LZMA_FILTER_ARM:
493 case LZMA_FILTER_ARMTHUMB:
494 case LZMA_FILTER_SPARC: {
495 lzma_options_bcj *options = f->options;
496 ADD_FIELD(options, start_offset);
497 break;
498 }
499 default:
500 PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
501 goto error;
502 }
503
504#undef ADD_FIELD
505
506 return spec;
507
508error:
509 Py_DECREF(spec);
510 return NULL;
511}
512
513
514/*[clinic input]
Larry Hastingsf256c222014-01-25 21:30:37 -0800515module _lzma
516class _lzma.LZMACompressor "Compressor *" "&Compressor_type"
517class _lzma.LZMADecompressor "Decompressor *" "&Decompressor_type"
518[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300519/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2c14bbe05ff0c147]*/
Larry Hastingsf256c222014-01-25 21:30:37 -0800520
521#include "clinic/_lzmamodule.c.h"
522
523/*[python input]
524
525class lzma_vli_converter(CConverter):
526 type = 'lzma_vli'
527 converter = 'lzma_vli_converter'
528
529class lzma_filter_converter(CConverter):
530 type = 'lzma_filter'
531 converter = 'lzma_filter_converter'
532 c_default = c_ignored_default = "{LZMA_VLI_UNKNOWN, NULL}"
533
534 def cleanup(self):
535 name = ensure_legal_c_identifier(self.name)
536 return ('if (%(name)s.id != LZMA_VLI_UNKNOWN)\n'
537 ' PyMem_Free(%(name)s.options);\n') % {'name': name}
538
539[python start generated code]*/
Larry Hastings581ee362014-01-28 05:00:08 -0800540/*[python end generated code: output=da39a3ee5e6b4b0d input=74fe7631ce377a94]*/
Larry Hastingsf256c222014-01-25 21:30:37 -0800541
542
543/* LZMACompressor class. */
544
545static PyObject *
546compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
547{
Larry Hastingsf256c222014-01-25 21:30:37 -0800548 PyObject *result;
Ma Linf9bedb62021-04-28 14:58:54 +0800549 _BlocksOutputBuffer buffer = {.list = NULL};
Dong-hee Na1937edd2020-06-23 00:53:07 +0900550 _lzma_state *state = PyType_GetModuleState(Py_TYPE(c));
551 assert(state != NULL);
Larry Hastingsf256c222014-01-25 21:30:37 -0800552
Ma Lin251ffa92021-05-01 07:32:49 +0800553 if (OutputBuffer_InitAndGrow(&buffer, -1, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
Ma Linf9bedb62021-04-28 14:58:54 +0800554 goto error;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900555 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800556 c->lzs.next_in = data;
557 c->lzs.avail_in = len;
Ma Linf9bedb62021-04-28 14:58:54 +0800558
Larry Hastingsf256c222014-01-25 21:30:37 -0800559 for (;;) {
560 lzma_ret lzret;
561
562 Py_BEGIN_ALLOW_THREADS
563 lzret = lzma_code(&c->lzs, action);
Ma Linf9bedb62021-04-28 14:58:54 +0800564 Py_END_ALLOW_THREADS
565
Dong-hee Na1937edd2020-06-23 00:53:07 +0900566 if (lzret == LZMA_BUF_ERROR && len == 0 && c->lzs.avail_out > 0) {
Serhiy Storchaka04f17f12016-10-31 08:30:09 +0200567 lzret = LZMA_OK; /* That wasn't a real error */
Dong-hee Na1937edd2020-06-23 00:53:07 +0900568 }
Dong-hee Na1937edd2020-06-23 00:53:07 +0900569 if (catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800570 goto error;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900571 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800572 if ((action == LZMA_RUN && c->lzs.avail_in == 0) ||
573 (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
574 break;
575 } else if (c->lzs.avail_out == 0) {
Ma Lin251ffa92021-05-01 07:32:49 +0800576 if (OutputBuffer_Grow(&buffer, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800577 goto error;
Ma Linf9bedb62021-04-28 14:58:54 +0800578 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800579 }
580 }
Ma Linf9bedb62021-04-28 14:58:54 +0800581
Ma Lin251ffa92021-05-01 07:32:49 +0800582 result = OutputBuffer_Finish(&buffer, c->lzs.avail_out);
Ma Linf9bedb62021-04-28 14:58:54 +0800583 if (result != NULL) {
584 return result;
585 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800586
587error:
Ma Lin251ffa92021-05-01 07:32:49 +0800588 OutputBuffer_OnError(&buffer);
Larry Hastingsf256c222014-01-25 21:30:37 -0800589 return NULL;
590}
591
592/*[clinic input]
593_lzma.LZMACompressor.compress
594
Larry Hastingsf256c222014-01-25 21:30:37 -0800595 data: Py_buffer
596 /
597
598Provide data to the compressor object.
599
600Returns a chunk of compressed data if possible, or b'' otherwise.
601
602When you have finished providing data to the compressor, call the
603flush() method to finish the compression process.
604[clinic start generated code]*/
605
606static PyObject *
607_lzma_LZMACompressor_compress_impl(Compressor *self, Py_buffer *data)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +0300608/*[clinic end generated code: output=31f615136963e00f input=64019eac7f2cc8d0]*/
Larry Hastingsf256c222014-01-25 21:30:37 -0800609{
610 PyObject *result = NULL;
611
612 ACQUIRE_LOCK(self);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900613 if (self->flushed) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800614 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
Dong-hee Na1937edd2020-06-23 00:53:07 +0900615 }
616 else {
Larry Hastingsf256c222014-01-25 21:30:37 -0800617 result = compress(self, data->buf, data->len, LZMA_RUN);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900618 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800619 RELEASE_LOCK(self);
620 return result;
621}
622
623/*[clinic input]
624_lzma.LZMACompressor.flush
625
Larry Hastingsf256c222014-01-25 21:30:37 -0800626Finish the compression process.
627
628Returns the compressed data left in internal buffers.
629
630The compressor object may not be used after this method is called.
631[clinic start generated code]*/
632
633static PyObject *
634_lzma_LZMACompressor_flush_impl(Compressor *self)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +0300635/*[clinic end generated code: output=fec21f3e22504f50 input=6b369303f67ad0a8]*/
Larry Hastingsf256c222014-01-25 21:30:37 -0800636{
637 PyObject *result = NULL;
638
639 ACQUIRE_LOCK(self);
640 if (self->flushed) {
641 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
642 } else {
643 self->flushed = 1;
644 result = compress(self, NULL, 0, LZMA_FINISH);
645 }
646 RELEASE_LOCK(self);
647 return result;
648}
649
Larry Hastingsf256c222014-01-25 21:30:37 -0800650static int
Dong-hee Na1937edd2020-06-23 00:53:07 +0900651Compressor_init_xz(_lzma_state *state, lzma_stream *lzs,
652 int check, uint32_t preset, PyObject *filterspecs)
Larry Hastingsf256c222014-01-25 21:30:37 -0800653{
654 lzma_ret lzret;
655
656 if (filterspecs == Py_None) {
657 lzret = lzma_easy_encoder(lzs, preset, check);
658 } else {
659 lzma_filter filters[LZMA_FILTERS_MAX + 1];
660
Dong-hee Na1937edd2020-06-23 00:53:07 +0900661 if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
Larry Hastingsf256c222014-01-25 21:30:37 -0800662 return -1;
663 lzret = lzma_stream_encoder(lzs, filters, check);
664 free_filter_chain(filters);
665 }
Dong-hee Na1937edd2020-06-23 00:53:07 +0900666 if (catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800667 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900668 }
669 else {
Larry Hastingsf256c222014-01-25 21:30:37 -0800670 return 0;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900671 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800672}
673
674static int
Dong-hee Na1937edd2020-06-23 00:53:07 +0900675Compressor_init_alone(_lzma_state *state, lzma_stream *lzs, uint32_t preset, PyObject *filterspecs)
Larry Hastingsf256c222014-01-25 21:30:37 -0800676{
677 lzma_ret lzret;
678
679 if (filterspecs == Py_None) {
680 lzma_options_lzma options;
681
682 if (lzma_lzma_preset(&options, preset)) {
Dong-hee Na1937edd2020-06-23 00:53:07 +0900683 PyErr_Format(state->error, "Invalid compression preset: %u", preset);
Larry Hastingsf256c222014-01-25 21:30:37 -0800684 return -1;
685 }
686 lzret = lzma_alone_encoder(lzs, &options);
687 } else {
688 lzma_filter filters[LZMA_FILTERS_MAX + 1];
689
Dong-hee Na1937edd2020-06-23 00:53:07 +0900690 if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
Larry Hastingsf256c222014-01-25 21:30:37 -0800691 return -1;
692 if (filters[0].id == LZMA_FILTER_LZMA1 &&
693 filters[1].id == LZMA_VLI_UNKNOWN) {
694 lzret = lzma_alone_encoder(lzs, filters[0].options);
695 } else {
696 PyErr_SetString(PyExc_ValueError,
697 "Invalid filter chain for FORMAT_ALONE - "
698 "must be a single LZMA1 filter");
699 lzret = LZMA_PROG_ERROR;
700 }
701 free_filter_chain(filters);
702 }
Dong-hee Na1937edd2020-06-23 00:53:07 +0900703 if (PyErr_Occurred() || catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800704 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900705 }
706 else {
Larry Hastingsf256c222014-01-25 21:30:37 -0800707 return 0;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900708 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800709}
710
711static int
Dong-hee Na1937edd2020-06-23 00:53:07 +0900712Compressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
Larry Hastingsf256c222014-01-25 21:30:37 -0800713{
714 lzma_filter filters[LZMA_FILTERS_MAX + 1];
715 lzma_ret lzret;
716
717 if (filterspecs == Py_None) {
718 PyErr_SetString(PyExc_ValueError,
719 "Must specify filters for FORMAT_RAW");
720 return -1;
721 }
Dong-hee Na1937edd2020-06-23 00:53:07 +0900722 if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800723 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900724 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800725 lzret = lzma_raw_encoder(lzs, filters);
726 free_filter_chain(filters);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900727 if (catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800728 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900729 }
730 else {
Larry Hastingsf256c222014-01-25 21:30:37 -0800731 return 0;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900732 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800733}
734
735/*[-clinic input]
736_lzma.LZMACompressor.__init__
737
Larry Hastingsf256c222014-01-25 21:30:37 -0800738 format: int(c_default="FORMAT_XZ") = FORMAT_XZ
739 The container format to use for the output. This can
740 be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.
741
742 check: int(c_default="-1") = unspecified
743 The integrity check to use. For FORMAT_XZ, the default
Martin Pantere26da7c2016-06-02 10:07:09 +0000744 is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not support integrity
Larry Hastingsf256c222014-01-25 21:30:37 -0800745 checks; for these formats, check must be omitted, or be CHECK_NONE.
746
747 preset: object = None
748 If provided should be an integer in the range 0-9, optionally
749 OR-ed with the constant PRESET_EXTREME.
750
751 filters: object = None
752 If provided should be a sequence of dicts. Each dict should
753 have an entry for "id" indicating the ID of the filter, plus
754 additional entries for options to the filter.
755
756Create a compressor object for compressing data incrementally.
757
758The settings used by the compressor can be specified either as a
759preset compression level (with the 'preset' argument), or in detail
760as a custom filter chain (with the 'filters' argument). For FORMAT_XZ
761and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset
762level. For FORMAT_RAW, the caller must always specify a filter chain;
763the raw compressor does not support preset compression levels.
764
765For one-shot compression, use the compress() function instead.
766[-clinic start generated code]*/
767static int
768Compressor_init(Compressor *self, PyObject *args, PyObject *kwargs)
769{
770 static char *arg_names[] = {"format", "check", "preset", "filters", NULL};
771 int format = FORMAT_XZ;
772 int check = -1;
773 uint32_t preset = LZMA_PRESET_DEFAULT;
774 PyObject *preset_obj = Py_None;
775 PyObject *filterspecs = Py_None;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900776 _lzma_state *state = PyType_GetModuleState(Py_TYPE(self));
777 assert(state != NULL);
Larry Hastingsf256c222014-01-25 21:30:37 -0800778 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
779 "|iiOO:LZMACompressor", arg_names,
780 &format, &check, &preset_obj,
Dong-hee Na1937edd2020-06-23 00:53:07 +0900781 &filterspecs)) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800782 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900783 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800784
785 if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) {
786 PyErr_SetString(PyExc_ValueError,
787 "Integrity checks are only supported by FORMAT_XZ");
788 return -1;
789 }
790
791 if (preset_obj != Py_None && filterspecs != Py_None) {
792 PyErr_SetString(PyExc_ValueError,
793 "Cannot specify both preset and filter chain");
794 return -1;
795 }
796
Dong-hee Na1937edd2020-06-23 00:53:07 +0900797 if (preset_obj != Py_None) {
798 if (!uint32_converter(preset_obj, &preset)) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800799 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900800 }
801 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800802
803 self->alloc.opaque = NULL;
804 self->alloc.alloc = PyLzma_Malloc;
805 self->alloc.free = PyLzma_Free;
806 self->lzs.allocator = &self->alloc;
807
Larry Hastingsf256c222014-01-25 21:30:37 -0800808 self->lock = PyThread_allocate_lock();
809 if (self->lock == NULL) {
810 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
811 return -1;
812 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800813
814 self->flushed = 0;
815 switch (format) {
816 case FORMAT_XZ:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900817 if (check == -1) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800818 check = LZMA_CHECK_CRC64;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900819 }
820 if (Compressor_init_xz(state, &self->lzs, check, preset, filterspecs) != 0) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800821 break;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900822 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800823 return 0;
824
825 case FORMAT_ALONE:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900826 if (Compressor_init_alone(state, &self->lzs, preset, filterspecs) != 0) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800827 break;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900828 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800829 return 0;
830
831 case FORMAT_RAW:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900832 if (Compressor_init_raw(state, &self->lzs, filterspecs) != 0) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800833 break;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900834 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800835 return 0;
836
837 default:
838 PyErr_Format(PyExc_ValueError,
839 "Invalid container format: %d", format);
840 break;
841 }
842
Larry Hastingsf256c222014-01-25 21:30:37 -0800843 PyThread_free_lock(self->lock);
844 self->lock = NULL;
Larry Hastingsf256c222014-01-25 21:30:37 -0800845 return -1;
846}
847
848static void
849Compressor_dealloc(Compressor *self)
850{
851 lzma_end(&self->lzs);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900852 if (self->lock != NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800853 PyThread_free_lock(self->lock);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900854 }
855 PyTypeObject *tp = Py_TYPE(self);
856 tp->tp_free((PyObject *)self);
857 Py_DECREF(tp);
858}
859
Larry Hastingsf256c222014-01-25 21:30:37 -0800860static PyMethodDef Compressor_methods[] = {
861 _LZMA_LZMACOMPRESSOR_COMPRESS_METHODDEF
862 _LZMA_LZMACOMPRESSOR_FLUSH_METHODDEF
Larry Hastingsf256c222014-01-25 21:30:37 -0800863 {NULL}
864};
865
Dong-hee Na1937edd2020-06-23 00:53:07 +0900866static int
867Compressor_traverse(Compressor *self, visitproc visit, void *arg)
868{
869 Py_VISIT(Py_TYPE(self));
870 return 0;
871}
872
Larry Hastingsf256c222014-01-25 21:30:37 -0800873PyDoc_STRVAR(Compressor_doc,
874"LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n"
875"\n"
876"Create a compressor object for compressing data incrementally.\n"
877"\n"
878"format specifies the container format to use for the output. This can\n"
879"be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n"
880"\n"
881"check specifies the integrity check to use. For FORMAT_XZ, the default\n"
Ville Skyttä49b27342017-08-03 09:00:59 +0300882"is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not support integrity\n"
Larry Hastingsf256c222014-01-25 21:30:37 -0800883"checks; for these formats, check must be omitted, or be CHECK_NONE.\n"
884"\n"
885"The settings used by the compressor can be specified either as a\n"
886"preset compression level (with the 'preset' argument), or in detail\n"
887"as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n"
888"and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n"
889"level. For FORMAT_RAW, the caller must always specify a filter chain;\n"
890"the raw compressor does not support preset compression levels.\n"
891"\n"
892"preset (if provided) should be an integer in the range 0-9, optionally\n"
893"OR-ed with the constant PRESET_EXTREME.\n"
894"\n"
895"filters (if provided) should be a sequence of dicts. Each dict should\n"
896"have an entry for \"id\" indicating the ID of the filter, plus\n"
897"additional entries for options to the filter.\n"
898"\n"
899"For one-shot compression, use the compress() function instead.\n");
900
Dong-hee Na1937edd2020-06-23 00:53:07 +0900901static PyType_Slot lzma_compressor_type_slots[] = {
902 {Py_tp_dealloc, Compressor_dealloc},
903 {Py_tp_methods, Compressor_methods},
904 {Py_tp_init, Compressor_init},
905 {Py_tp_new, PyType_GenericNew},
906 {Py_tp_doc, (char *)Compressor_doc},
907 {Py_tp_traverse, Compressor_traverse},
908 {0, 0}
Larry Hastingsf256c222014-01-25 21:30:37 -0800909};
910
Dong-hee Na1937edd2020-06-23 00:53:07 +0900911static PyType_Spec lzma_compressor_type_spec = {
912 .name = "_lzma.LZMACompressor",
913 .basicsize = sizeof(Compressor),
914 // Calling PyType_GetModuleState() on a subclass is not safe.
915 // lzma_compressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
916 // which prevents to create a subclass.
917 // So calling PyType_GetModuleState() in this file is always safe.
918 .flags = Py_TPFLAGS_DEFAULT,
919 .slots = lzma_compressor_type_slots,
920};
Larry Hastingsf256c222014-01-25 21:30:37 -0800921
922/* LZMADecompressor class. */
923
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100924/* Decompress data of length d->lzs.avail_in in d->lzs.next_in. The output
925 buffer is allocated dynamically and returned. At most max_length bytes are
926 returned, so some of the input may not be consumed. d->lzs.next_in and
927 d->lzs.avail_in are updated to reflect the consumed input. */
928static PyObject*
929decompress_buf(Decompressor *d, Py_ssize_t max_length)
Larry Hastingsf256c222014-01-25 21:30:37 -0800930{
Larry Hastingsf256c222014-01-25 21:30:37 -0800931 PyObject *result;
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100932 lzma_stream *lzs = &d->lzs;
Ma Linf9bedb62021-04-28 14:58:54 +0800933 _BlocksOutputBuffer buffer = {.list = NULL};
Dong-hee Na1937edd2020-06-23 00:53:07 +0900934 _lzma_state *state = PyType_GetModuleState(Py_TYPE(d));
935 assert(state != NULL);
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200936
Ma Lin251ffa92021-05-01 07:32:49 +0800937 if (OutputBuffer_InitAndGrow(&buffer, max_length, &lzs->next_out, &lzs->avail_out) < 0) {
Ma Linf9bedb62021-04-28 14:58:54 +0800938 goto error;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900939 }
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200940
Larry Hastingsf256c222014-01-25 21:30:37 -0800941 for (;;) {
942 lzma_ret lzret;
943
944 Py_BEGIN_ALLOW_THREADS
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100945 lzret = lzma_code(lzs, LZMA_RUN);
Ma Linf9bedb62021-04-28 14:58:54 +0800946 Py_END_ALLOW_THREADS
947
Dong-hee Na1937edd2020-06-23 00:53:07 +0900948 if (lzret == LZMA_BUF_ERROR && lzs->avail_in == 0 && lzs->avail_out > 0) {
animalize4ffd05d2019-09-12 22:20:37 +0800949 lzret = LZMA_OK; /* That wasn't a real error */
Dong-hee Na1937edd2020-06-23 00:53:07 +0900950 }
Dong-hee Na1937edd2020-06-23 00:53:07 +0900951 if (catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800952 goto error;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900953 }
954 if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800955 d->check = lzma_get_check(&d->lzs);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900956 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800957 if (lzret == LZMA_STREAM_END) {
958 d->eof = 1;
Larry Hastingsf256c222014-01-25 21:30:37 -0800959 break;
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100960 } else if (lzs->avail_out == 0) {
animalize4ffd05d2019-09-12 22:20:37 +0800961 /* Need to check lzs->avail_out before lzs->avail_in.
962 Maybe lzs's internal state still have a few bytes
963 can be output, grow the output buffer and continue
964 if max_lengh < 0. */
Ma Lin251ffa92021-05-01 07:32:49 +0800965 if (OutputBuffer_GetDataSize(&buffer, lzs->avail_out) == max_length) {
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100966 break;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900967 }
Ma Lin251ffa92021-05-01 07:32:49 +0800968 if (OutputBuffer_Grow(&buffer, &lzs->next_out, &lzs->avail_out) < 0) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800969 goto error;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900970 }
animalize4ffd05d2019-09-12 22:20:37 +0800971 } else if (lzs->avail_in == 0) {
972 break;
Larry Hastingsf256c222014-01-25 21:30:37 -0800973 }
974 }
Ma Linf9bedb62021-04-28 14:58:54 +0800975
Ma Lin251ffa92021-05-01 07:32:49 +0800976 result = OutputBuffer_Finish(&buffer, lzs->avail_out);
Ma Linf9bedb62021-04-28 14:58:54 +0800977 if (result != NULL) {
978 return result;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900979 }
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100980
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100981error:
Ma Lin251ffa92021-05-01 07:32:49 +0800982 OutputBuffer_OnError(&buffer);
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100983 return NULL;
984}
985
986static PyObject *
987decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
988{
989 char input_buffer_in_use;
990 PyObject *result;
991 lzma_stream *lzs = &d->lzs;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200992
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100993 /* Prepend unconsumed input if necessary */
994 if (lzs->next_in != NULL) {
995 size_t avail_now, avail_total;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200996
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100997 /* Number of bytes we can append to input buffer */
998 avail_now = (d->input_buffer + d->input_buffer_size)
999 - (lzs->next_in + lzs->avail_in);
1000
1001 /* Number of bytes we can append if we move existing
1002 contents to beginning of buffer (overwriting
1003 consumed input) */
1004 avail_total = d->input_buffer_size - lzs->avail_in;
1005
1006 if (avail_total < len) {
1007 size_t offset = lzs->next_in - d->input_buffer;
1008 uint8_t *tmp;
1009 size_t new_size = d->input_buffer_size + len - avail_now;
1010
1011 /* Assign to temporary variable first, so we don't
1012 lose address of allocated buffer if realloc fails */
1013 tmp = PyMem_Realloc(d->input_buffer, new_size);
1014 if (tmp == NULL) {
1015 PyErr_SetNone(PyExc_MemoryError);
1016 return NULL;
1017 }
1018 d->input_buffer = tmp;
1019 d->input_buffer_size = new_size;
Serhiy Storchaka009b8112015-03-18 21:53:15 +02001020
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001021 lzs->next_in = d->input_buffer + offset;
1022 }
1023 else if (avail_now < len) {
1024 memmove(d->input_buffer, lzs->next_in,
1025 lzs->avail_in);
1026 lzs->next_in = d->input_buffer;
1027 }
1028 memcpy((void*)(lzs->next_in + lzs->avail_in), data, len);
1029 lzs->avail_in += len;
1030 input_buffer_in_use = 1;
1031 }
1032 else {
1033 lzs->next_in = data;
1034 lzs->avail_in = len;
1035 input_buffer_in_use = 0;
1036 }
1037
1038 result = decompress_buf(d, max_length);
Serhiy Storchakac0b70372016-09-27 20:14:26 +03001039 if (result == NULL) {
1040 lzs->next_in = NULL;
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001041 return NULL;
Serhiy Storchakac0b70372016-09-27 20:14:26 +03001042 }
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001043
1044 if (d->eof) {
1045 d->needs_input = 0;
1046 if (lzs->avail_in > 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03001047 Py_XSETREF(d->unused_data,
Serhiy Storchaka4a1e70f2015-12-27 12:36:18 +02001048 PyBytes_FromStringAndSize((char *)lzs->next_in, lzs->avail_in));
Dong-hee Na1937edd2020-06-23 00:53:07 +09001049 if (d->unused_data == NULL) {
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001050 goto error;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001051 }
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001052 }
1053 }
1054 else if (lzs->avail_in == 0) {
1055 lzs->next_in = NULL;
animalize4ffd05d2019-09-12 22:20:37 +08001056
1057 if (lzs->avail_out == 0) {
1058 /* (avail_in==0 && avail_out==0)
1059 Maybe lzs's internal state still have a few bytes can
1060 be output, try to output them next time. */
1061 d->needs_input = 0;
1062
Ma Linf9bedb62021-04-28 14:58:54 +08001063 /* If max_length < 0, lzs->avail_out always > 0 */
animalize4ffd05d2019-09-12 22:20:37 +08001064 assert(max_length >= 0);
1065 } else {
1066 /* Input buffer exhausted, output buffer has space. */
1067 d->needs_input = 1;
1068 }
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001069 }
1070 else {
1071 d->needs_input = 0;
1072
1073 /* If we did not use the input buffer, we now have
1074 to copy the tail from the caller's buffer into the
1075 input buffer */
1076 if (!input_buffer_in_use) {
1077
1078 /* Discard buffer if it's too small
1079 (resizing it may needlessly copy the current contents) */
1080 if (d->input_buffer != NULL &&
1081 d->input_buffer_size < lzs->avail_in) {
1082 PyMem_Free(d->input_buffer);
1083 d->input_buffer = NULL;
1084 }
1085
1086 /* Allocate if necessary */
1087 if (d->input_buffer == NULL) {
1088 d->input_buffer = PyMem_Malloc(lzs->avail_in);
1089 if (d->input_buffer == NULL) {
1090 PyErr_SetNone(PyExc_MemoryError);
1091 goto error;
1092 }
1093 d->input_buffer_size = lzs->avail_in;
1094 }
1095
1096 /* Copy tail */
1097 memcpy(d->input_buffer, lzs->next_in, lzs->avail_in);
1098 lzs->next_in = d->input_buffer;
1099 }
1100 }
Serhiy Storchaka009b8112015-03-18 21:53:15 +02001101
Larry Hastingsf256c222014-01-25 21:30:37 -08001102 return result;
1103
1104error:
1105 Py_XDECREF(result);
1106 return NULL;
1107}
1108
1109/*[clinic input]
1110_lzma.LZMADecompressor.decompress
1111
Larry Hastingsf256c222014-01-25 21:30:37 -08001112 data: Py_buffer
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001113 max_length: Py_ssize_t=-1
Larry Hastingsf256c222014-01-25 21:30:37 -08001114
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001115Decompress *data*, returning uncompressed data as bytes.
Larry Hastingsf256c222014-01-25 21:30:37 -08001116
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001117If *max_length* is nonnegative, returns at most *max_length* bytes of
1118decompressed data. If this limit is reached and further output can be
1119produced, *self.needs_input* will be set to ``False``. In this case, the next
1120call to *decompress()* may provide *data* as b'' to obtain more of the output.
Larry Hastingsf256c222014-01-25 21:30:37 -08001121
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001122If all of the input data was decompressed and returned (either because this
1123was less than *max_length* bytes, or because *max_length* was negative),
1124*self.needs_input* will be set to True.
1125
1126Attempting to decompress data after the end of stream is reached raises an
1127EOFError. Any data found after the end of the stream is ignored and saved in
1128the unused_data attribute.
Larry Hastingsf256c222014-01-25 21:30:37 -08001129[clinic start generated code]*/
1130
1131static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001132_lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data,
1133 Py_ssize_t max_length)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001134/*[clinic end generated code: output=ef4e20ec7122241d input=60c1f135820e309d]*/
Larry Hastingsf256c222014-01-25 21:30:37 -08001135{
1136 PyObject *result = NULL;
1137
1138 ACQUIRE_LOCK(self);
1139 if (self->eof)
1140 PyErr_SetString(PyExc_EOFError, "Already at end of stream");
1141 else
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001142 result = decompress(self, data->buf, data->len, max_length);
Larry Hastingsf256c222014-01-25 21:30:37 -08001143 RELEASE_LOCK(self);
1144 return result;
1145}
1146
Larry Hastingsf256c222014-01-25 21:30:37 -08001147static int
Dong-hee Na1937edd2020-06-23 00:53:07 +09001148Decompressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
Larry Hastingsf256c222014-01-25 21:30:37 -08001149{
1150 lzma_filter filters[LZMA_FILTERS_MAX + 1];
1151 lzma_ret lzret;
1152
Dong-hee Na1937edd2020-06-23 00:53:07 +09001153 if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001154 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001155 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001156 lzret = lzma_raw_decoder(lzs, filters);
1157 free_filter_chain(filters);
Dong-hee Na1937edd2020-06-23 00:53:07 +09001158 if (catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001159 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001160 }
1161 else {
Larry Hastingsf256c222014-01-25 21:30:37 -08001162 return 0;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001163 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001164}
1165
1166/*[clinic input]
1167_lzma.LZMADecompressor.__init__
1168
Larry Hastingsf256c222014-01-25 21:30:37 -08001169 format: int(c_default="FORMAT_AUTO") = FORMAT_AUTO
1170 Specifies the container format of the input stream. If this is
1171 FORMAT_AUTO (the default), the decompressor will automatically detect
1172 whether the input is FORMAT_XZ or FORMAT_ALONE. Streams created with
1173 FORMAT_RAW cannot be autodetected.
1174
1175 memlimit: object = None
1176 Limit the amount of memory used by the decompressor. This will cause
1177 decompression to fail if the input cannot be decompressed within the
1178 given limit.
1179
1180 filters: object = None
1181 A custom filter chain. This argument is required for FORMAT_RAW, and
1182 not accepted with any other format. When provided, this should be a
1183 sequence of dicts, each indicating the ID and options for a single
1184 filter.
1185
1186Create a decompressor object for decompressing data incrementally.
1187
1188For one-shot decompression, use the decompress() function instead.
1189[clinic start generated code]*/
1190
1191static int
Larry Hastings89964c42015-04-14 18:07:59 -04001192_lzma_LZMADecompressor___init___impl(Decompressor *self, int format,
1193 PyObject *memlimit, PyObject *filters)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001194/*[clinic end generated code: output=3e1821f8aa36564c input=81fe684a6c2f8a27]*/
Larry Hastingsf256c222014-01-25 21:30:37 -08001195{
1196 const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK;
1197 uint64_t memlimit_ = UINT64_MAX;
1198 lzma_ret lzret;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001199 _lzma_state *state = PyType_GetModuleState(Py_TYPE(self));
1200 assert(state != NULL);
Larry Hastingsf256c222014-01-25 21:30:37 -08001201
1202 if (memlimit != Py_None) {
1203 if (format == FORMAT_RAW) {
1204 PyErr_SetString(PyExc_ValueError,
1205 "Cannot specify memory limit with FORMAT_RAW");
1206 return -1;
1207 }
1208 memlimit_ = PyLong_AsUnsignedLongLong(memlimit);
Dong-hee Na1937edd2020-06-23 00:53:07 +09001209 if (PyErr_Occurred()) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001210 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001211 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001212 }
1213
1214 if (format == FORMAT_RAW && filters == Py_None) {
1215 PyErr_SetString(PyExc_ValueError,
1216 "Must specify filters for FORMAT_RAW");
1217 return -1;
1218 } else if (format != FORMAT_RAW && filters != Py_None) {
1219 PyErr_SetString(PyExc_ValueError,
1220 "Cannot specify filters except with FORMAT_RAW");
1221 return -1;
1222 }
1223
1224 self->alloc.opaque = NULL;
1225 self->alloc.alloc = PyLzma_Malloc;
1226 self->alloc.free = PyLzma_Free;
1227 self->lzs.allocator = &self->alloc;
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001228 self->lzs.next_in = NULL;
Larry Hastingsf256c222014-01-25 21:30:37 -08001229
Victor Stinner9b7cf752018-06-23 10:35:23 +02001230 PyThread_type_lock lock = PyThread_allocate_lock();
1231 if (lock == NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001232 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
1233 return -1;
1234 }
Victor Stinner9b7cf752018-06-23 10:35:23 +02001235 if (self->lock != NULL) {
1236 PyThread_free_lock(self->lock);
1237 }
1238 self->lock = lock;
Larry Hastingsf256c222014-01-25 21:30:37 -08001239
1240 self->check = LZMA_CHECK_UNKNOWN;
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001241 self->needs_input = 1;
1242 self->input_buffer = NULL;
1243 self->input_buffer_size = 0;
Oren Milmand019bc82018-02-13 12:28:33 +02001244 Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
Dong-hee Na1937edd2020-06-23 00:53:07 +09001245 if (self->unused_data == NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001246 goto error;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001247 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001248
1249 switch (format) {
1250 case FORMAT_AUTO:
1251 lzret = lzma_auto_decoder(&self->lzs, memlimit_, decoder_flags);
Dong-hee Na1937edd2020-06-23 00:53:07 +09001252 if (catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001253 break;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001254 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001255 return 0;
1256
1257 case FORMAT_XZ:
1258 lzret = lzma_stream_decoder(&self->lzs, memlimit_, decoder_flags);
Dong-hee Na1937edd2020-06-23 00:53:07 +09001259 if (catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001260 break;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001261 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001262 return 0;
1263
1264 case FORMAT_ALONE:
1265 self->check = LZMA_CHECK_NONE;
1266 lzret = lzma_alone_decoder(&self->lzs, memlimit_);
Dong-hee Na1937edd2020-06-23 00:53:07 +09001267 if (catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001268 break;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001269 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001270 return 0;
1271
1272 case FORMAT_RAW:
1273 self->check = LZMA_CHECK_NONE;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001274 if (Decompressor_init_raw(state, &self->lzs, filters) == -1) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001275 break;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001276 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001277 return 0;
1278
1279 default:
1280 PyErr_Format(PyExc_ValueError,
1281 "Invalid container format: %d", format);
1282 break;
1283 }
1284
1285error:
1286 Py_CLEAR(self->unused_data);
Larry Hastingsf256c222014-01-25 21:30:37 -08001287 PyThread_free_lock(self->lock);
1288 self->lock = NULL;
Larry Hastingsf256c222014-01-25 21:30:37 -08001289 return -1;
1290}
1291
1292static void
1293Decompressor_dealloc(Decompressor *self)
1294{
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001295 if(self->input_buffer != NULL)
1296 PyMem_Free(self->input_buffer);
Serhiy Storchaka009b8112015-03-18 21:53:15 +02001297
Larry Hastingsf256c222014-01-25 21:30:37 -08001298 lzma_end(&self->lzs);
1299 Py_CLEAR(self->unused_data);
Dong-hee Na1937edd2020-06-23 00:53:07 +09001300 if (self->lock != NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001301 PyThread_free_lock(self->lock);
Dong-hee Na1937edd2020-06-23 00:53:07 +09001302 }
1303 PyTypeObject *tp = Py_TYPE(self);
1304 tp->tp_free((PyObject *)self);
1305 Py_DECREF(tp);
1306}
1307
1308static int
1309Decompressor_traverse(Decompressor *self, visitproc visit, void *arg)
1310{
1311 Py_VISIT(Py_TYPE(self));
1312 return 0;
1313}
1314
Larry Hastingsf256c222014-01-25 21:30:37 -08001315static PyMethodDef Decompressor_methods[] = {
1316 _LZMA_LZMADECOMPRESSOR_DECOMPRESS_METHODDEF
Larry Hastingsf256c222014-01-25 21:30:37 -08001317 {NULL}
1318};
1319
1320PyDoc_STRVAR(Decompressor_check_doc,
1321"ID of the integrity check used by the input stream.");
1322
1323PyDoc_STRVAR(Decompressor_eof_doc,
1324"True if the end-of-stream marker has been reached.");
1325
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001326PyDoc_STRVAR(Decompressor_needs_input_doc,
1327"True if more input is needed before more decompressed data can be produced.");
1328
Larry Hastingsf256c222014-01-25 21:30:37 -08001329PyDoc_STRVAR(Decompressor_unused_data_doc,
1330"Data found after the end of the compressed stream.");
1331
1332static PyMemberDef Decompressor_members[] = {
1333 {"check", T_INT, offsetof(Decompressor, check), READONLY,
1334 Decompressor_check_doc},
1335 {"eof", T_BOOL, offsetof(Decompressor, eof), READONLY,
1336 Decompressor_eof_doc},
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001337 {"needs_input", T_BOOL, offsetof(Decompressor, needs_input), READONLY,
1338 Decompressor_needs_input_doc},
Larry Hastingsf256c222014-01-25 21:30:37 -08001339 {"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY,
1340 Decompressor_unused_data_doc},
1341 {NULL}
1342};
1343
Dong-hee Na1937edd2020-06-23 00:53:07 +09001344static PyType_Slot lzma_decompressor_type_slots[] = {
1345 {Py_tp_dealloc, Decompressor_dealloc},
1346 {Py_tp_methods, Decompressor_methods},
1347 {Py_tp_init, _lzma_LZMADecompressor___init__},
1348 {Py_tp_new, PyType_GenericNew},
1349 {Py_tp_doc, (char *)_lzma_LZMADecompressor___init____doc__},
1350 {Py_tp_traverse, Decompressor_traverse},
1351 {Py_tp_members, Decompressor_members},
1352 {0, 0}
1353};
1354
1355static PyType_Spec lzma_decompressor_type_spec = {
1356 .name = "_lzma.LZMADecompressor",
1357 .basicsize = sizeof(Decompressor),
1358 // Calling PyType_GetModuleState() on a subclass is not safe.
1359 // lzma_decompressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
1360 // which prevents to create a subclass.
1361 // So calling PyType_GetModuleState() in this file is always safe.
1362 .flags = Py_TPFLAGS_DEFAULT,
1363 .slots = lzma_decompressor_type_slots,
Larry Hastingsf256c222014-01-25 21:30:37 -08001364};
1365
1366
1367/* Module-level functions. */
1368
1369/*[clinic input]
1370_lzma.is_check_supported
1371 check_id: int
1372 /
1373
1374Test whether the given integrity check is supported.
1375
1376Always returns True for CHECK_NONE and CHECK_CRC32.
1377[clinic start generated code]*/
1378
1379static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001380_lzma_is_check_supported_impl(PyObject *module, int check_id)
1381/*[clinic end generated code: output=e4f14ba3ce2ad0a5 input=5518297b97b2318f]*/
Larry Hastingsf256c222014-01-25 21:30:37 -08001382{
1383 return PyBool_FromLong(lzma_check_is_supported(check_id));
1384}
1385
Dong-hee Na1937edd2020-06-23 00:53:07 +09001386PyDoc_STRVAR(_lzma__encode_filter_properties__doc__,
1387"_encode_filter_properties($module, filter, /)\n"
1388"--\n"
1389"\n"
1390"Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).\n"
1391"\n"
1392"The result does not include the filter ID itself, only the options.");
Larry Hastingsf256c222014-01-25 21:30:37 -08001393
Dong-hee Na1937edd2020-06-23 00:53:07 +09001394#define _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF \
1395 {"_encode_filter_properties", (PyCFunction)_lzma__encode_filter_properties, METH_O, _lzma__encode_filter_properties__doc__},
Larry Hastingsf256c222014-01-25 21:30:37 -08001396
Dong-hee Na1937edd2020-06-23 00:53:07 +09001397static PyObject *
1398_lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter);
Larry Hastingsf256c222014-01-25 21:30:37 -08001399
Dong-hee Na1937edd2020-06-23 00:53:07 +09001400static PyObject *
1401_lzma__encode_filter_properties(PyObject *module, PyObject *arg)
1402{
1403 PyObject *return_value = NULL;
1404 lzma_filter filter = {LZMA_VLI_UNKNOWN, NULL};
1405 _lzma_state *state = get_lzma_state(module);
1406 assert(state != NULL);
1407 if (!lzma_filter_converter(state, arg, &filter)) {
1408 goto exit;
1409 }
1410 return_value = _lzma__encode_filter_properties_impl(module, filter);
1411
1412exit:
1413 /* Cleanup for filter */
1414 if (filter.id != LZMA_VLI_UNKNOWN) {
1415 PyMem_Free(filter.options);
1416 }
1417
1418 return return_value;
1419}
Larry Hastingsf256c222014-01-25 21:30:37 -08001420
1421static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001422_lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter)
Larry Hastingsf256c222014-01-25 21:30:37 -08001423{
1424 lzma_ret lzret;
1425 uint32_t encoded_size;
1426 PyObject *result = NULL;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001427 _lzma_state *state = get_lzma_state(module);
1428 assert(state != NULL);
Larry Hastingsf256c222014-01-25 21:30:37 -08001429
1430 lzret = lzma_properties_size(&encoded_size, &filter);
Dong-hee Na1937edd2020-06-23 00:53:07 +09001431 if (catch_lzma_error(state, lzret))
Larry Hastingsf256c222014-01-25 21:30:37 -08001432 goto error;
1433
1434 result = PyBytes_FromStringAndSize(NULL, encoded_size);
1435 if (result == NULL)
1436 goto error;
1437
1438 lzret = lzma_properties_encode(
1439 &filter, (uint8_t *)PyBytes_AS_STRING(result));
Dong-hee Na1937edd2020-06-23 00:53:07 +09001440 if (catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001441 goto error;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001442 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001443
1444 return result;
1445
1446error:
1447 Py_XDECREF(result);
1448 return NULL;
1449}
1450
1451
1452/*[clinic input]
1453_lzma._decode_filter_properties
1454 filter_id: lzma_vli
1455 encoded_props: Py_buffer
1456 /
1457
1458Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).
1459
1460The result does not include the filter ID itself, only the options.
1461[clinic start generated code]*/
1462
1463static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001464_lzma__decode_filter_properties_impl(PyObject *module, lzma_vli filter_id,
Larry Hastings89964c42015-04-14 18:07:59 -04001465 Py_buffer *encoded_props)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001466/*[clinic end generated code: output=714fd2ef565d5c60 input=246410800782160c]*/
Larry Hastingsf256c222014-01-25 21:30:37 -08001467{
1468 lzma_filter filter;
1469 lzma_ret lzret;
1470 PyObject *result = NULL;
1471 filter.id = filter_id;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001472 _lzma_state *state = get_lzma_state(module);
1473 assert(state != NULL);
Larry Hastingsf256c222014-01-25 21:30:37 -08001474
1475 lzret = lzma_properties_decode(
1476 &filter, NULL, encoded_props->buf, encoded_props->len);
Dong-hee Na1937edd2020-06-23 00:53:07 +09001477 if (catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001478 return NULL;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001479 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001480
1481 result = build_filter_spec(&filter);
1482
1483 /* We use vanilla free() here instead of PyMem_Free() - filter.options was
1484 allocated by lzma_properties_decode() using the default allocator. */
1485 free(filter.options);
1486 return result;
1487}
1488
Larry Hastingsf256c222014-01-25 21:30:37 -08001489/* Some of our constants are more than 32 bits wide, so PyModule_AddIntConstant
1490 would not work correctly on platforms with 32-bit longs. */
1491static int
Benjamin Petersonaf580df2016-09-06 10:46:49 -07001492module_add_int_constant(PyObject *m, const char *name, long long value)
Larry Hastingsf256c222014-01-25 21:30:37 -08001493{
1494 PyObject *o = PyLong_FromLongLong(value);
Dong-hee Na1937edd2020-06-23 00:53:07 +09001495 if (o == NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001496 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001497 }
1498 if (PyModule_AddObject(m, name, o) == 0) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001499 return 0;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001500 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001501 Py_DECREF(o);
1502 return -1;
1503}
1504
Dong-hee Na1937edd2020-06-23 00:53:07 +09001505static int
1506lzma_exec(PyObject *module)
1507{
1508#define ADD_INT_PREFIX_MACRO(module, macro) \
1509 do { \
1510 if (module_add_int_constant(module, #macro, LZMA_ ## macro) < 0) { \
1511 return -1; \
1512 } \
1513 } while(0)
1514
1515#define ADD_INT_MACRO(module, macro) \
1516 do { \
1517 if (PyModule_AddIntMacro(module, macro) < 0) { \
1518 return -1; \
1519 } \
1520 } while (0)
1521
1522
1523 _lzma_state *state = get_lzma_state(module);
1524
1525 state->empty_tuple = PyTuple_New(0);
1526 if (state->empty_tuple == NULL) {
1527 return -1;
1528 }
1529
1530 ADD_INT_MACRO(module, FORMAT_AUTO);
1531 ADD_INT_MACRO(module, FORMAT_XZ);
1532 ADD_INT_MACRO(module, FORMAT_ALONE);
1533 ADD_INT_MACRO(module, FORMAT_RAW);
1534 ADD_INT_PREFIX_MACRO(module, CHECK_NONE);
1535 ADD_INT_PREFIX_MACRO(module, CHECK_CRC32);
1536 ADD_INT_PREFIX_MACRO(module, CHECK_CRC64);
1537 ADD_INT_PREFIX_MACRO(module, CHECK_SHA256);
1538 ADD_INT_PREFIX_MACRO(module, CHECK_ID_MAX);
1539 ADD_INT_PREFIX_MACRO(module, CHECK_UNKNOWN);
1540 ADD_INT_PREFIX_MACRO(module, FILTER_LZMA1);
1541 ADD_INT_PREFIX_MACRO(module, FILTER_LZMA2);
1542 ADD_INT_PREFIX_MACRO(module, FILTER_DELTA);
1543 ADD_INT_PREFIX_MACRO(module, FILTER_X86);
1544 ADD_INT_PREFIX_MACRO(module, FILTER_IA64);
1545 ADD_INT_PREFIX_MACRO(module, FILTER_ARM);
1546 ADD_INT_PREFIX_MACRO(module, FILTER_ARMTHUMB);
1547 ADD_INT_PREFIX_MACRO(module, FILTER_SPARC);
1548 ADD_INT_PREFIX_MACRO(module, FILTER_POWERPC);
1549 ADD_INT_PREFIX_MACRO(module, MF_HC3);
1550 ADD_INT_PREFIX_MACRO(module, MF_HC4);
1551 ADD_INT_PREFIX_MACRO(module, MF_BT2);
1552 ADD_INT_PREFIX_MACRO(module, MF_BT3);
1553 ADD_INT_PREFIX_MACRO(module, MF_BT4);
1554 ADD_INT_PREFIX_MACRO(module, MODE_FAST);
1555 ADD_INT_PREFIX_MACRO(module, MODE_NORMAL);
1556 ADD_INT_PREFIX_MACRO(module, PRESET_DEFAULT);
1557 ADD_INT_PREFIX_MACRO(module, PRESET_EXTREME);
1558
1559 state->error = PyErr_NewExceptionWithDoc("_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL);
1560 if (state->error == NULL) {
1561 return -1;
1562 }
1563
1564 if (PyModule_AddType(module, (PyTypeObject *)state->error) < 0) {
1565 return -1;
1566 }
1567
1568
1569 state->lzma_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
1570 &lzma_compressor_type_spec, NULL);
1571 if (state->lzma_compressor_type == NULL) {
1572 return -1;
1573 }
1574
1575 if (PyModule_AddType(module, state->lzma_compressor_type) < 0) {
1576 return -1;
1577 }
1578
1579 state->lzma_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
1580 &lzma_decompressor_type_spec, NULL);
1581 if (state->lzma_decompressor_type == NULL) {
1582 return -1;
1583 }
1584
1585 if (PyModule_AddType(module, state->lzma_decompressor_type) < 0) {
1586 return -1;
1587 }
1588
1589 return 0;
1590}
1591
1592static PyMethodDef lzma_methods[] = {
1593 _LZMA_IS_CHECK_SUPPORTED_METHODDEF
1594 _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF
1595 _LZMA__DECODE_FILTER_PROPERTIES_METHODDEF
1596 {NULL}
1597};
1598
1599static PyModuleDef_Slot lzma_slots[] = {
1600 {Py_mod_exec, lzma_exec},
1601 {0, NULL}
1602};
1603
1604static int
1605lzma_traverse(PyObject *module, visitproc visit, void *arg)
1606{
1607 _lzma_state *state = get_lzma_state(module);
1608 Py_VISIT(state->lzma_compressor_type);
1609 Py_VISIT(state->lzma_decompressor_type);
1610 Py_VISIT(state->error);
1611 Py_VISIT(state->empty_tuple);
1612 return 0;
1613}
1614
1615static int
1616lzma_clear(PyObject *module)
1617{
1618 _lzma_state *state = get_lzma_state(module);
1619 Py_CLEAR(state->lzma_compressor_type);
1620 Py_CLEAR(state->lzma_decompressor_type);
1621 Py_CLEAR(state->error);
1622 Py_CLEAR(state->empty_tuple);
1623 return 0;
1624}
1625
1626static void
1627lzma_free(void *module)
1628{
1629 lzma_clear((PyObject *)module);
1630}
1631
1632static PyModuleDef _lzmamodule = {
1633 PyModuleDef_HEAD_INIT,
1634 .m_name = "_lzma",
1635 .m_size = sizeof(_lzma_state),
1636 .m_methods = lzma_methods,
1637 .m_slots = lzma_slots,
1638 .m_traverse = lzma_traverse,
1639 .m_clear = lzma_clear,
1640 .m_free = lzma_free,
1641};
Larry Hastingsf256c222014-01-25 21:30:37 -08001642
1643PyMODINIT_FUNC
1644PyInit__lzma(void)
1645{
Dong-hee Na1937edd2020-06-23 00:53:07 +09001646 return PyModuleDef_Init(&_lzmamodule);
Larry Hastingsf256c222014-01-25 21:30:37 -08001647}