blob: b01f63000981366ce79bac4eea134667d823cbf3 [file] [log] [blame]
Larry Hastingsf256c222014-01-25 21:30:37 -08001/* _lzma - Low-level Python interface to liblzma.
2
3 Initial implementation by Per Øyvind Karlsen.
4 Rewritten by Nadeem Vawda.
5
6*/
7
8#define PY_SSIZE_T_CLEAN
9
10#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020011#include "structmember.h" // PyMemberDef
Larry Hastingsf256c222014-01-25 21:30:37 -080012
13#include <stdarg.h>
14#include <string.h>
15
16#include <lzma.h>
17
Larry Hastingsf256c222014-01-25 21:30:37 -080018#define ACQUIRE_LOCK(obj) do { \
19 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
20 Py_BEGIN_ALLOW_THREADS \
21 PyThread_acquire_lock((obj)->lock, 1); \
22 Py_END_ALLOW_THREADS \
23 } } while (0)
24#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
Larry Hastingsf256c222014-01-25 21:30:37 -080025
Dong-hee Na1937edd2020-06-23 00:53:07 +090026typedef struct {
27 PyTypeObject *lzma_compressor_type;
28 PyTypeObject *lzma_decompressor_type;
29 PyObject *error;
30 PyObject *empty_tuple;
31} _lzma_state;
32
33static inline _lzma_state*
34get_lzma_state(PyObject *module)
35{
36 void *state = PyModule_GetState(module);
37 assert(state != NULL);
38 return (_lzma_state *)state;
39}
Larry Hastingsf256c222014-01-25 21:30:37 -080040
41/* Container formats: */
42enum {
43 FORMAT_AUTO,
44 FORMAT_XZ,
45 FORMAT_ALONE,
46 FORMAT_RAW,
47};
48
49#define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1)
50
51
52typedef struct {
53 PyObject_HEAD
54 lzma_allocator alloc;
55 lzma_stream lzs;
56 int flushed;
Larry Hastingsf256c222014-01-25 21:30:37 -080057 PyThread_type_lock lock;
Larry Hastingsf256c222014-01-25 21:30:37 -080058} Compressor;
59
60typedef struct {
61 PyObject_HEAD
62 lzma_allocator alloc;
63 lzma_stream lzs;
64 int check;
65 char eof;
66 PyObject *unused_data;
Antoine Pitrou26795ba2015-01-17 16:22:18 +010067 char needs_input;
68 uint8_t *input_buffer;
69 size_t input_buffer_size;
Larry Hastingsf256c222014-01-25 21:30:37 -080070 PyThread_type_lock lock;
Larry Hastingsf256c222014-01-25 21:30:37 -080071} Decompressor;
72
Larry Hastingsf256c222014-01-25 21:30:37 -080073/* Helper functions. */
74
75static int
Dong-hee Na1937edd2020-06-23 00:53:07 +090076catch_lzma_error(_lzma_state *state, lzma_ret lzret)
Larry Hastingsf256c222014-01-25 21:30:37 -080077{
78 switch (lzret) {
79 case LZMA_OK:
80 case LZMA_GET_CHECK:
81 case LZMA_NO_CHECK:
82 case LZMA_STREAM_END:
83 return 0;
84 case LZMA_UNSUPPORTED_CHECK:
Dong-hee Na1937edd2020-06-23 00:53:07 +090085 PyErr_SetString(state->error, "Unsupported integrity check");
Larry Hastingsf256c222014-01-25 21:30:37 -080086 return 1;
87 case LZMA_MEM_ERROR:
88 PyErr_NoMemory();
89 return 1;
90 case LZMA_MEMLIMIT_ERROR:
Dong-hee Na1937edd2020-06-23 00:53:07 +090091 PyErr_SetString(state->error, "Memory usage limit exceeded");
Larry Hastingsf256c222014-01-25 21:30:37 -080092 return 1;
93 case LZMA_FORMAT_ERROR:
Dong-hee Na1937edd2020-06-23 00:53:07 +090094 PyErr_SetString(state->error, "Input format not supported by decoder");
Larry Hastingsf256c222014-01-25 21:30:37 -080095 return 1;
96 case LZMA_OPTIONS_ERROR:
Dong-hee Na1937edd2020-06-23 00:53:07 +090097 PyErr_SetString(state->error, "Invalid or unsupported options");
Larry Hastingsf256c222014-01-25 21:30:37 -080098 return 1;
99 case LZMA_DATA_ERROR:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900100 PyErr_SetString(state->error, "Corrupt input data");
Larry Hastingsf256c222014-01-25 21:30:37 -0800101 return 1;
102 case LZMA_BUF_ERROR:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900103 PyErr_SetString(state->error, "Insufficient buffer space");
Larry Hastingsf256c222014-01-25 21:30:37 -0800104 return 1;
105 case LZMA_PROG_ERROR:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900106 PyErr_SetString(state->error, "Internal error");
Larry Hastingsf256c222014-01-25 21:30:37 -0800107 return 1;
108 default:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900109 PyErr_Format(state->error, "Unrecognized error from liblzma: %d", lzret);
Larry Hastingsf256c222014-01-25 21:30:37 -0800110 return 1;
111 }
112}
113
114static void*
115PyLzma_Malloc(void *opaque, size_t items, size_t size)
116{
Dong-hee Na1937edd2020-06-23 00:53:07 +0900117 if (size != 0 && items > (size_t)PY_SSIZE_T_MAX / size) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800118 return NULL;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900119 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800120 /* PyMem_Malloc() cannot be used:
121 the GIL is not held when lzma_code() is called */
122 return PyMem_RawMalloc(items * size);
123}
124
125static void
126PyLzma_Free(void *opaque, void *ptr)
127{
128 PyMem_RawFree(ptr);
129}
130
131#if BUFSIZ < 8192
132#define INITIAL_BUFFER_SIZE 8192
133#else
134#define INITIAL_BUFFER_SIZE BUFSIZ
135#endif
136
137static int
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100138grow_buffer(PyObject **buf, Py_ssize_t max_length)
Larry Hastingsf256c222014-01-25 21:30:37 -0800139{
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100140 Py_ssize_t size = PyBytes_GET_SIZE(*buf);
141 Py_ssize_t newsize = size + (size >> 3) + 6;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200142
Dong-hee Na1937edd2020-06-23 00:53:07 +0900143 if (max_length > 0 && newsize > max_length) {
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100144 newsize = max_length;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900145 }
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200146
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100147 return _PyBytes_Resize(buf, newsize);
Larry Hastingsf256c222014-01-25 21:30:37 -0800148}
149
150
151/* Some custom type conversions for PyArg_ParseTupleAndKeywords(),
152 since the predefined conversion specifiers do not suit our needs:
153
154 uint32_t - the "I" (unsigned int) specifier is the right size, but
155 silently ignores overflows on conversion.
156
Benjamin Petersonaf580df2016-09-06 10:46:49 -0700157 lzma_vli - the "K" (unsigned long long) specifier is the right
Larry Hastingsf256c222014-01-25 21:30:37 -0800158 size, but like "I" it silently ignores overflows on conversion.
159
160 lzma_mode and lzma_match_finder - these are enumeration types, and
161 so the size of each is implementation-defined. Worse, different
162 enum types can be of different sizes within the same program, so
163 to be strictly correct, we need to define two separate converters.
164 */
165
166#define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \
167 static int \
168 FUNCNAME(PyObject *obj, void *ptr) \
169 { \
Benjamin Petersonaf580df2016-09-06 10:46:49 -0700170 unsigned long long val; \
Larry Hastingsf256c222014-01-25 21:30:37 -0800171 \
172 val = PyLong_AsUnsignedLongLong(obj); \
173 if (PyErr_Occurred()) \
174 return 0; \
Benjamin Petersonaf580df2016-09-06 10:46:49 -0700175 if ((unsigned long long)(TYPE)val != val) { \
Larry Hastingsf256c222014-01-25 21:30:37 -0800176 PyErr_SetString(PyExc_OverflowError, \
177 "Value too large for " #TYPE " type"); \
178 return 0; \
179 } \
180 *(TYPE *)ptr = (TYPE)val; \
181 return 1; \
182 }
183
184INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
185INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter)
186INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
187INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
188
189#undef INT_TYPE_CONVERTER_FUNC
190
191
192/* Filter specifier parsing.
193
194 This code handles converting filter specifiers (Python dicts) into
195 the C lzma_filter structs expected by liblzma. */
196
197static void *
Dong-hee Na1937edd2020-06-23 00:53:07 +0900198parse_filter_spec_lzma(_lzma_state *state, PyObject *spec)
Larry Hastingsf256c222014-01-25 21:30:37 -0800199{
200 static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp",
201 "pb", "mode", "nice_len", "mf", "depth", NULL};
202 PyObject *id;
203 PyObject *preset_obj;
204 uint32_t preset = LZMA_PRESET_DEFAULT;
205 lzma_options_lzma *options;
206
207 /* First, fill in default values for all the options using a preset.
208 Then, override the defaults with any values given by the caller. */
209
210 preset_obj = PyMapping_GetItemString(spec, "preset");
211 if (preset_obj == NULL) {
Dong-hee Na1937edd2020-06-23 00:53:07 +0900212 if (PyErr_ExceptionMatches(PyExc_KeyError)) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800213 PyErr_Clear();
Dong-hee Na1937edd2020-06-23 00:53:07 +0900214 }
215 else {
Larry Hastingsf256c222014-01-25 21:30:37 -0800216 return NULL;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900217 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800218 } else {
219 int ok = uint32_converter(preset_obj, &preset);
220 Py_DECREF(preset_obj);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900221 if (!ok) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800222 return NULL;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900223 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800224 }
225
Andy Lester7668a8b2020-03-24 23:26:44 -0500226 options = (lzma_options_lzma *)PyMem_Calloc(1, sizeof *options);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900227 if (options == NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800228 return PyErr_NoMemory();
Dong-hee Na1937edd2020-06-23 00:53:07 +0900229 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800230
231 if (lzma_lzma_preset(options, preset)) {
232 PyMem_Free(options);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900233 PyErr_Format(state->error, "Invalid compression preset: %u", preset);
Larry Hastingsf256c222014-01-25 21:30:37 -0800234 return NULL;
235 }
236
Dong-hee Na1937edd2020-06-23 00:53:07 +0900237 if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec,
Larry Hastingsf256c222014-01-25 21:30:37 -0800238 "|OOO&O&O&O&O&O&O&O&", optnames,
239 &id, &preset_obj,
240 uint32_converter, &options->dict_size,
241 uint32_converter, &options->lc,
242 uint32_converter, &options->lp,
243 uint32_converter, &options->pb,
244 lzma_mode_converter, &options->mode,
245 uint32_converter, &options->nice_len,
246 lzma_mf_converter, &options->mf,
247 uint32_converter, &options->depth)) {
248 PyErr_SetString(PyExc_ValueError,
249 "Invalid filter specifier for LZMA filter");
250 PyMem_Free(options);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900251 return NULL;
Larry Hastingsf256c222014-01-25 21:30:37 -0800252 }
Dong-hee Na1937edd2020-06-23 00:53:07 +0900253
Larry Hastingsf256c222014-01-25 21:30:37 -0800254 return options;
255}
256
257static void *
Dong-hee Na1937edd2020-06-23 00:53:07 +0900258parse_filter_spec_delta(_lzma_state *state, PyObject *spec)
Larry Hastingsf256c222014-01-25 21:30:37 -0800259{
260 static char *optnames[] = {"id", "dist", NULL};
261 PyObject *id;
262 uint32_t dist = 1;
263 lzma_options_delta *options;
264
Dong-hee Na1937edd2020-06-23 00:53:07 +0900265 if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
Larry Hastingsf256c222014-01-25 21:30:37 -0800266 &id, uint32_converter, &dist)) {
267 PyErr_SetString(PyExc_ValueError,
268 "Invalid filter specifier for delta filter");
269 return NULL;
270 }
271
Andy Lester7668a8b2020-03-24 23:26:44 -0500272 options = (lzma_options_delta *)PyMem_Calloc(1, sizeof *options);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900273 if (options == NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800274 return PyErr_NoMemory();
Dong-hee Na1937edd2020-06-23 00:53:07 +0900275 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800276 options->type = LZMA_DELTA_TYPE_BYTE;
277 options->dist = dist;
278 return options;
279}
280
281static void *
Dong-hee Na1937edd2020-06-23 00:53:07 +0900282parse_filter_spec_bcj(_lzma_state *state, PyObject *spec)
Larry Hastingsf256c222014-01-25 21:30:37 -0800283{
284 static char *optnames[] = {"id", "start_offset", NULL};
285 PyObject *id;
286 uint32_t start_offset = 0;
287 lzma_options_bcj *options;
288
Dong-hee Na1937edd2020-06-23 00:53:07 +0900289 if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
Larry Hastingsf256c222014-01-25 21:30:37 -0800290 &id, uint32_converter, &start_offset)) {
291 PyErr_SetString(PyExc_ValueError,
292 "Invalid filter specifier for BCJ filter");
293 return NULL;
294 }
295
Andy Lester7668a8b2020-03-24 23:26:44 -0500296 options = (lzma_options_bcj *)PyMem_Calloc(1, sizeof *options);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900297 if (options == NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800298 return PyErr_NoMemory();
Dong-hee Na1937edd2020-06-23 00:53:07 +0900299 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800300 options->start_offset = start_offset;
301 return options;
302}
303
304static int
Dong-hee Na1937edd2020-06-23 00:53:07 +0900305lzma_filter_converter(_lzma_state *state, PyObject *spec, void *ptr)
Larry Hastingsf256c222014-01-25 21:30:37 -0800306{
307 lzma_filter *f = (lzma_filter *)ptr;
308 PyObject *id_obj;
309
310 if (!PyMapping_Check(spec)) {
311 PyErr_SetString(PyExc_TypeError,
312 "Filter specifier must be a dict or dict-like object");
313 return 0;
314 }
315 id_obj = PyMapping_GetItemString(spec, "id");
316 if (id_obj == NULL) {
317 if (PyErr_ExceptionMatches(PyExc_KeyError))
318 PyErr_SetString(PyExc_ValueError,
319 "Filter specifier must have an \"id\" entry");
320 return 0;
321 }
322 f->id = PyLong_AsUnsignedLongLong(id_obj);
323 Py_DECREF(id_obj);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900324 if (PyErr_Occurred()) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800325 return 0;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900326 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800327
328 switch (f->id) {
329 case LZMA_FILTER_LZMA1:
330 case LZMA_FILTER_LZMA2:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900331 f->options = parse_filter_spec_lzma(state, spec);
Larry Hastingsf256c222014-01-25 21:30:37 -0800332 return f->options != NULL;
333 case LZMA_FILTER_DELTA:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900334 f->options = parse_filter_spec_delta(state, spec);
Larry Hastingsf256c222014-01-25 21:30:37 -0800335 return f->options != NULL;
336 case LZMA_FILTER_X86:
337 case LZMA_FILTER_POWERPC:
338 case LZMA_FILTER_IA64:
339 case LZMA_FILTER_ARM:
340 case LZMA_FILTER_ARMTHUMB:
341 case LZMA_FILTER_SPARC:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900342 f->options = parse_filter_spec_bcj(state, spec);
Larry Hastingsf256c222014-01-25 21:30:37 -0800343 return f->options != NULL;
344 default:
345 PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
346 return 0;
347 }
348}
349
350static void
351free_filter_chain(lzma_filter filters[])
352{
Dong-hee Na1937edd2020-06-23 00:53:07 +0900353 for (int i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800354 PyMem_Free(filters[i].options);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900355 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800356}
357
358static int
Dong-hee Na1937edd2020-06-23 00:53:07 +0900359parse_filter_chain_spec(_lzma_state *state, lzma_filter filters[], PyObject *filterspecs)
Larry Hastingsf256c222014-01-25 21:30:37 -0800360{
361 Py_ssize_t i, num_filters;
362
363 num_filters = PySequence_Length(filterspecs);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900364 if (num_filters == -1) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800365 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900366 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800367 if (num_filters > LZMA_FILTERS_MAX) {
368 PyErr_Format(PyExc_ValueError,
369 "Too many filters - liblzma supports a maximum of %d",
370 LZMA_FILTERS_MAX);
371 return -1;
372 }
373
374 for (i = 0; i < num_filters; i++) {
375 int ok = 1;
376 PyObject *spec = PySequence_GetItem(filterspecs, i);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900377 if (spec == NULL || !lzma_filter_converter(state, spec, &filters[i])) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800378 ok = 0;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900379 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800380 Py_XDECREF(spec);
381 if (!ok) {
382 filters[i].id = LZMA_VLI_UNKNOWN;
383 free_filter_chain(filters);
384 return -1;
385 }
386 }
387 filters[num_filters].id = LZMA_VLI_UNKNOWN;
388 return 0;
389}
390
391
392/* Filter specifier construction.
393
394 This code handles converting C lzma_filter structs into
395 Python-level filter specifiers (represented as dicts). */
396
397static int
Benjamin Petersonaf580df2016-09-06 10:46:49 -0700398spec_add_field(PyObject *spec, _Py_Identifier *key, unsigned long long value)
Larry Hastingsf256c222014-01-25 21:30:37 -0800399{
400 int status;
401 PyObject *value_object;
402
403 value_object = PyLong_FromUnsignedLongLong(value);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900404 if (value_object == NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800405 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900406 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800407
408 status = _PyDict_SetItemId(spec, key, value_object);
409 Py_DECREF(value_object);
410 return status;
411}
412
413static PyObject *
414build_filter_spec(const lzma_filter *f)
415{
416 PyObject *spec;
417
418 spec = PyDict_New();
Dong-hee Na1937edd2020-06-23 00:53:07 +0900419 if (spec == NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800420 return NULL;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900421 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800422
423#define ADD_FIELD(SOURCE, FIELD) \
424 do { \
425 _Py_IDENTIFIER(FIELD); \
426 if (spec_add_field(spec, &PyId_##FIELD, SOURCE->FIELD) == -1) \
427 goto error;\
428 } while (0)
429
430 ADD_FIELD(f, id);
431
432 switch (f->id) {
433 /* For LZMA1 filters, lzma_properties_{encode,decode}() only look at the
434 lc, lp, pb, and dict_size fields. For LZMA2 filters, only the
435 dict_size field is used. */
436 case LZMA_FILTER_LZMA1: {
437 lzma_options_lzma *options = f->options;
438 ADD_FIELD(options, lc);
439 ADD_FIELD(options, lp);
440 ADD_FIELD(options, pb);
441 ADD_FIELD(options, dict_size);
442 break;
443 }
444 case LZMA_FILTER_LZMA2: {
445 lzma_options_lzma *options = f->options;
446 ADD_FIELD(options, dict_size);
447 break;
448 }
449 case LZMA_FILTER_DELTA: {
450 lzma_options_delta *options = f->options;
451 ADD_FIELD(options, dist);
452 break;
453 }
454 case LZMA_FILTER_X86:
455 case LZMA_FILTER_POWERPC:
456 case LZMA_FILTER_IA64:
457 case LZMA_FILTER_ARM:
458 case LZMA_FILTER_ARMTHUMB:
459 case LZMA_FILTER_SPARC: {
460 lzma_options_bcj *options = f->options;
461 ADD_FIELD(options, start_offset);
462 break;
463 }
464 default:
465 PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
466 goto error;
467 }
468
469#undef ADD_FIELD
470
471 return spec;
472
473error:
474 Py_DECREF(spec);
475 return NULL;
476}
477
478
479/*[clinic input]
Larry Hastingsf256c222014-01-25 21:30:37 -0800480module _lzma
481class _lzma.LZMACompressor "Compressor *" "&Compressor_type"
482class _lzma.LZMADecompressor "Decompressor *" "&Decompressor_type"
483[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300484/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2c14bbe05ff0c147]*/
Larry Hastingsf256c222014-01-25 21:30:37 -0800485
486#include "clinic/_lzmamodule.c.h"
487
488/*[python input]
489
490class lzma_vli_converter(CConverter):
491 type = 'lzma_vli'
492 converter = 'lzma_vli_converter'
493
494class lzma_filter_converter(CConverter):
495 type = 'lzma_filter'
496 converter = 'lzma_filter_converter'
497 c_default = c_ignored_default = "{LZMA_VLI_UNKNOWN, NULL}"
498
499 def cleanup(self):
500 name = ensure_legal_c_identifier(self.name)
501 return ('if (%(name)s.id != LZMA_VLI_UNKNOWN)\n'
502 ' PyMem_Free(%(name)s.options);\n') % {'name': name}
503
504[python start generated code]*/
Larry Hastings581ee362014-01-28 05:00:08 -0800505/*[python end generated code: output=da39a3ee5e6b4b0d input=74fe7631ce377a94]*/
Larry Hastingsf256c222014-01-25 21:30:37 -0800506
507
508/* LZMACompressor class. */
509
510static PyObject *
511compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
512{
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100513 Py_ssize_t data_size = 0;
Larry Hastingsf256c222014-01-25 21:30:37 -0800514 PyObject *result;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900515 _lzma_state *state = PyType_GetModuleState(Py_TYPE(c));
516 assert(state != NULL);
Larry Hastingsf256c222014-01-25 21:30:37 -0800517
518 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900519 if (result == NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800520 return NULL;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900521 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800522 c->lzs.next_in = data;
523 c->lzs.avail_in = len;
524 c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result);
525 c->lzs.avail_out = PyBytes_GET_SIZE(result);
526 for (;;) {
527 lzma_ret lzret;
528
529 Py_BEGIN_ALLOW_THREADS
530 lzret = lzma_code(&c->lzs, action);
531 data_size = (char *)c->lzs.next_out - PyBytes_AS_STRING(result);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900532 if (lzret == LZMA_BUF_ERROR && len == 0 && c->lzs.avail_out > 0) {
Serhiy Storchaka04f17f12016-10-31 08:30:09 +0200533 lzret = LZMA_OK; /* That wasn't a real error */
Dong-hee Na1937edd2020-06-23 00:53:07 +0900534 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800535 Py_END_ALLOW_THREADS
Dong-hee Na1937edd2020-06-23 00:53:07 +0900536 if (catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800537 goto error;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900538 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800539 if ((action == LZMA_RUN && c->lzs.avail_in == 0) ||
540 (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
541 break;
542 } else if (c->lzs.avail_out == 0) {
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100543 if (grow_buffer(&result, -1) == -1)
Larry Hastingsf256c222014-01-25 21:30:37 -0800544 goto error;
545 c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
546 c->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
547 }
548 }
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100549 if (data_size != PyBytes_GET_SIZE(result))
Dong-hee Na1937edd2020-06-23 00:53:07 +0900550 if (_PyBytes_Resize(&result, data_size) == -1) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800551 goto error;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900552 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800553 return result;
554
555error:
556 Py_XDECREF(result);
557 return NULL;
558}
559
560/*[clinic input]
561_lzma.LZMACompressor.compress
562
Larry Hastingsf256c222014-01-25 21:30:37 -0800563 data: Py_buffer
564 /
565
566Provide data to the compressor object.
567
568Returns a chunk of compressed data if possible, or b'' otherwise.
569
570When you have finished providing data to the compressor, call the
571flush() method to finish the compression process.
572[clinic start generated code]*/
573
574static PyObject *
575_lzma_LZMACompressor_compress_impl(Compressor *self, Py_buffer *data)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +0300576/*[clinic end generated code: output=31f615136963e00f input=64019eac7f2cc8d0]*/
Larry Hastingsf256c222014-01-25 21:30:37 -0800577{
578 PyObject *result = NULL;
579
580 ACQUIRE_LOCK(self);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900581 if (self->flushed) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800582 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
Dong-hee Na1937edd2020-06-23 00:53:07 +0900583 }
584 else {
Larry Hastingsf256c222014-01-25 21:30:37 -0800585 result = compress(self, data->buf, data->len, LZMA_RUN);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900586 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800587 RELEASE_LOCK(self);
588 return result;
589}
590
591/*[clinic input]
592_lzma.LZMACompressor.flush
593
Larry Hastingsf256c222014-01-25 21:30:37 -0800594Finish the compression process.
595
596Returns the compressed data left in internal buffers.
597
598The compressor object may not be used after this method is called.
599[clinic start generated code]*/
600
601static PyObject *
602_lzma_LZMACompressor_flush_impl(Compressor *self)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +0300603/*[clinic end generated code: output=fec21f3e22504f50 input=6b369303f67ad0a8]*/
Larry Hastingsf256c222014-01-25 21:30:37 -0800604{
605 PyObject *result = NULL;
606
607 ACQUIRE_LOCK(self);
608 if (self->flushed) {
609 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
610 } else {
611 self->flushed = 1;
612 result = compress(self, NULL, 0, LZMA_FINISH);
613 }
614 RELEASE_LOCK(self);
615 return result;
616}
617
Larry Hastingsf256c222014-01-25 21:30:37 -0800618static int
Dong-hee Na1937edd2020-06-23 00:53:07 +0900619Compressor_init_xz(_lzma_state *state, lzma_stream *lzs,
620 int check, uint32_t preset, PyObject *filterspecs)
Larry Hastingsf256c222014-01-25 21:30:37 -0800621{
622 lzma_ret lzret;
623
624 if (filterspecs == Py_None) {
625 lzret = lzma_easy_encoder(lzs, preset, check);
626 } else {
627 lzma_filter filters[LZMA_FILTERS_MAX + 1];
628
Dong-hee Na1937edd2020-06-23 00:53:07 +0900629 if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
Larry Hastingsf256c222014-01-25 21:30:37 -0800630 return -1;
631 lzret = lzma_stream_encoder(lzs, filters, check);
632 free_filter_chain(filters);
633 }
Dong-hee Na1937edd2020-06-23 00:53:07 +0900634 if (catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800635 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900636 }
637 else {
Larry Hastingsf256c222014-01-25 21:30:37 -0800638 return 0;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900639 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800640}
641
642static int
Dong-hee Na1937edd2020-06-23 00:53:07 +0900643Compressor_init_alone(_lzma_state *state, lzma_stream *lzs, uint32_t preset, PyObject *filterspecs)
Larry Hastingsf256c222014-01-25 21:30:37 -0800644{
645 lzma_ret lzret;
646
647 if (filterspecs == Py_None) {
648 lzma_options_lzma options;
649
650 if (lzma_lzma_preset(&options, preset)) {
Dong-hee Na1937edd2020-06-23 00:53:07 +0900651 PyErr_Format(state->error, "Invalid compression preset: %u", preset);
Larry Hastingsf256c222014-01-25 21:30:37 -0800652 return -1;
653 }
654 lzret = lzma_alone_encoder(lzs, &options);
655 } else {
656 lzma_filter filters[LZMA_FILTERS_MAX + 1];
657
Dong-hee Na1937edd2020-06-23 00:53:07 +0900658 if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
Larry Hastingsf256c222014-01-25 21:30:37 -0800659 return -1;
660 if (filters[0].id == LZMA_FILTER_LZMA1 &&
661 filters[1].id == LZMA_VLI_UNKNOWN) {
662 lzret = lzma_alone_encoder(lzs, filters[0].options);
663 } else {
664 PyErr_SetString(PyExc_ValueError,
665 "Invalid filter chain for FORMAT_ALONE - "
666 "must be a single LZMA1 filter");
667 lzret = LZMA_PROG_ERROR;
668 }
669 free_filter_chain(filters);
670 }
Dong-hee Na1937edd2020-06-23 00:53:07 +0900671 if (PyErr_Occurred() || catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800672 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900673 }
674 else {
Larry Hastingsf256c222014-01-25 21:30:37 -0800675 return 0;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900676 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800677}
678
679static int
Dong-hee Na1937edd2020-06-23 00:53:07 +0900680Compressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
Larry Hastingsf256c222014-01-25 21:30:37 -0800681{
682 lzma_filter filters[LZMA_FILTERS_MAX + 1];
683 lzma_ret lzret;
684
685 if (filterspecs == Py_None) {
686 PyErr_SetString(PyExc_ValueError,
687 "Must specify filters for FORMAT_RAW");
688 return -1;
689 }
Dong-hee Na1937edd2020-06-23 00:53:07 +0900690 if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800691 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900692 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800693 lzret = lzma_raw_encoder(lzs, filters);
694 free_filter_chain(filters);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900695 if (catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800696 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900697 }
698 else {
Larry Hastingsf256c222014-01-25 21:30:37 -0800699 return 0;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900700 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800701}
702
703/*[-clinic input]
704_lzma.LZMACompressor.__init__
705
Larry Hastingsf256c222014-01-25 21:30:37 -0800706 format: int(c_default="FORMAT_XZ") = FORMAT_XZ
707 The container format to use for the output. This can
708 be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.
709
710 check: int(c_default="-1") = unspecified
711 The integrity check to use. For FORMAT_XZ, the default
Martin Pantere26da7c2016-06-02 10:07:09 +0000712 is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not support integrity
Larry Hastingsf256c222014-01-25 21:30:37 -0800713 checks; for these formats, check must be omitted, or be CHECK_NONE.
714
715 preset: object = None
716 If provided should be an integer in the range 0-9, optionally
717 OR-ed with the constant PRESET_EXTREME.
718
719 filters: object = None
720 If provided should be a sequence of dicts. Each dict should
721 have an entry for "id" indicating the ID of the filter, plus
722 additional entries for options to the filter.
723
724Create a compressor object for compressing data incrementally.
725
726The settings used by the compressor can be specified either as a
727preset compression level (with the 'preset' argument), or in detail
728as a custom filter chain (with the 'filters' argument). For FORMAT_XZ
729and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset
730level. For FORMAT_RAW, the caller must always specify a filter chain;
731the raw compressor does not support preset compression levels.
732
733For one-shot compression, use the compress() function instead.
734[-clinic start generated code]*/
735static int
736Compressor_init(Compressor *self, PyObject *args, PyObject *kwargs)
737{
738 static char *arg_names[] = {"format", "check", "preset", "filters", NULL};
739 int format = FORMAT_XZ;
740 int check = -1;
741 uint32_t preset = LZMA_PRESET_DEFAULT;
742 PyObject *preset_obj = Py_None;
743 PyObject *filterspecs = Py_None;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900744 _lzma_state *state = PyType_GetModuleState(Py_TYPE(self));
745 assert(state != NULL);
Larry Hastingsf256c222014-01-25 21:30:37 -0800746 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
747 "|iiOO:LZMACompressor", arg_names,
748 &format, &check, &preset_obj,
Dong-hee Na1937edd2020-06-23 00:53:07 +0900749 &filterspecs)) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800750 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900751 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800752
753 if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) {
754 PyErr_SetString(PyExc_ValueError,
755 "Integrity checks are only supported by FORMAT_XZ");
756 return -1;
757 }
758
759 if (preset_obj != Py_None && filterspecs != Py_None) {
760 PyErr_SetString(PyExc_ValueError,
761 "Cannot specify both preset and filter chain");
762 return -1;
763 }
764
Dong-hee Na1937edd2020-06-23 00:53:07 +0900765 if (preset_obj != Py_None) {
766 if (!uint32_converter(preset_obj, &preset)) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800767 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900768 }
769 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800770
771 self->alloc.opaque = NULL;
772 self->alloc.alloc = PyLzma_Malloc;
773 self->alloc.free = PyLzma_Free;
774 self->lzs.allocator = &self->alloc;
775
Larry Hastingsf256c222014-01-25 21:30:37 -0800776 self->lock = PyThread_allocate_lock();
777 if (self->lock == NULL) {
778 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
779 return -1;
780 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800781
782 self->flushed = 0;
783 switch (format) {
784 case FORMAT_XZ:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900785 if (check == -1) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800786 check = LZMA_CHECK_CRC64;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900787 }
788 if (Compressor_init_xz(state, &self->lzs, check, preset, filterspecs) != 0) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800789 break;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900790 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800791 return 0;
792
793 case FORMAT_ALONE:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900794 if (Compressor_init_alone(state, &self->lzs, preset, filterspecs) != 0) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800795 break;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900796 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800797 return 0;
798
799 case FORMAT_RAW:
Dong-hee Na1937edd2020-06-23 00:53:07 +0900800 if (Compressor_init_raw(state, &self->lzs, filterspecs) != 0) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800801 break;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900802 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800803 return 0;
804
805 default:
806 PyErr_Format(PyExc_ValueError,
807 "Invalid container format: %d", format);
808 break;
809 }
810
Larry Hastingsf256c222014-01-25 21:30:37 -0800811 PyThread_free_lock(self->lock);
812 self->lock = NULL;
Larry Hastingsf256c222014-01-25 21:30:37 -0800813 return -1;
814}
815
816static void
817Compressor_dealloc(Compressor *self)
818{
819 lzma_end(&self->lzs);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900820 if (self->lock != NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800821 PyThread_free_lock(self->lock);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900822 }
823 PyTypeObject *tp = Py_TYPE(self);
824 tp->tp_free((PyObject *)self);
825 Py_DECREF(tp);
826}
827
Larry Hastingsf256c222014-01-25 21:30:37 -0800828static PyMethodDef Compressor_methods[] = {
829 _LZMA_LZMACOMPRESSOR_COMPRESS_METHODDEF
830 _LZMA_LZMACOMPRESSOR_FLUSH_METHODDEF
Larry Hastingsf256c222014-01-25 21:30:37 -0800831 {NULL}
832};
833
Dong-hee Na1937edd2020-06-23 00:53:07 +0900834static int
835Compressor_traverse(Compressor *self, visitproc visit, void *arg)
836{
837 Py_VISIT(Py_TYPE(self));
838 return 0;
839}
840
Larry Hastingsf256c222014-01-25 21:30:37 -0800841PyDoc_STRVAR(Compressor_doc,
842"LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n"
843"\n"
844"Create a compressor object for compressing data incrementally.\n"
845"\n"
846"format specifies the container format to use for the output. This can\n"
847"be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n"
848"\n"
849"check specifies the integrity check to use. For FORMAT_XZ, the default\n"
Ville Skyttä49b27342017-08-03 09:00:59 +0300850"is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not support integrity\n"
Larry Hastingsf256c222014-01-25 21:30:37 -0800851"checks; for these formats, check must be omitted, or be CHECK_NONE.\n"
852"\n"
853"The settings used by the compressor can be specified either as a\n"
854"preset compression level (with the 'preset' argument), or in detail\n"
855"as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n"
856"and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n"
857"level. For FORMAT_RAW, the caller must always specify a filter chain;\n"
858"the raw compressor does not support preset compression levels.\n"
859"\n"
860"preset (if provided) should be an integer in the range 0-9, optionally\n"
861"OR-ed with the constant PRESET_EXTREME.\n"
862"\n"
863"filters (if provided) should be a sequence of dicts. Each dict should\n"
864"have an entry for \"id\" indicating the ID of the filter, plus\n"
865"additional entries for options to the filter.\n"
866"\n"
867"For one-shot compression, use the compress() function instead.\n");
868
Dong-hee Na1937edd2020-06-23 00:53:07 +0900869static PyType_Slot lzma_compressor_type_slots[] = {
870 {Py_tp_dealloc, Compressor_dealloc},
871 {Py_tp_methods, Compressor_methods},
872 {Py_tp_init, Compressor_init},
873 {Py_tp_new, PyType_GenericNew},
874 {Py_tp_doc, (char *)Compressor_doc},
875 {Py_tp_traverse, Compressor_traverse},
876 {0, 0}
Larry Hastingsf256c222014-01-25 21:30:37 -0800877};
878
Dong-hee Na1937edd2020-06-23 00:53:07 +0900879static PyType_Spec lzma_compressor_type_spec = {
880 .name = "_lzma.LZMACompressor",
881 .basicsize = sizeof(Compressor),
882 // Calling PyType_GetModuleState() on a subclass is not safe.
883 // lzma_compressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
884 // which prevents to create a subclass.
885 // So calling PyType_GetModuleState() in this file is always safe.
886 .flags = Py_TPFLAGS_DEFAULT,
887 .slots = lzma_compressor_type_slots,
888};
Larry Hastingsf256c222014-01-25 21:30:37 -0800889
890/* LZMADecompressor class. */
891
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100892/* Decompress data of length d->lzs.avail_in in d->lzs.next_in. The output
893 buffer is allocated dynamically and returned. At most max_length bytes are
894 returned, so some of the input may not be consumed. d->lzs.next_in and
895 d->lzs.avail_in are updated to reflect the consumed input. */
896static PyObject*
897decompress_buf(Decompressor *d, Py_ssize_t max_length)
Larry Hastingsf256c222014-01-25 21:30:37 -0800898{
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100899 Py_ssize_t data_size = 0;
Larry Hastingsf256c222014-01-25 21:30:37 -0800900 PyObject *result;
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100901 lzma_stream *lzs = &d->lzs;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900902 _lzma_state *state = PyType_GetModuleState(Py_TYPE(d));
903 assert(state != NULL);
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200904
Dong-hee Na1937edd2020-06-23 00:53:07 +0900905 if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE) {
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100906 result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900907 }
908 else {
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100909 result = PyBytes_FromStringAndSize(NULL, max_length);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900910 }
911 if (result == NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800912 return NULL;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900913 }
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100914
915 lzs->next_out = (uint8_t *)PyBytes_AS_STRING(result);
916 lzs->avail_out = PyBytes_GET_SIZE(result);
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200917
Larry Hastingsf256c222014-01-25 21:30:37 -0800918 for (;;) {
919 lzma_ret lzret;
920
921 Py_BEGIN_ALLOW_THREADS
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100922 lzret = lzma_code(lzs, LZMA_RUN);
923 data_size = (char *)lzs->next_out - PyBytes_AS_STRING(result);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900924 if (lzret == LZMA_BUF_ERROR && lzs->avail_in == 0 && lzs->avail_out > 0) {
animalize4ffd05d2019-09-12 22:20:37 +0800925 lzret = LZMA_OK; /* That wasn't a real error */
Dong-hee Na1937edd2020-06-23 00:53:07 +0900926 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800927 Py_END_ALLOW_THREADS
animalize4ffd05d2019-09-12 22:20:37 +0800928
Dong-hee Na1937edd2020-06-23 00:53:07 +0900929 if (catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800930 goto error;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900931 }
932 if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800933 d->check = lzma_get_check(&d->lzs);
Dong-hee Na1937edd2020-06-23 00:53:07 +0900934 }
Larry Hastingsf256c222014-01-25 21:30:37 -0800935 if (lzret == LZMA_STREAM_END) {
936 d->eof = 1;
Larry Hastingsf256c222014-01-25 21:30:37 -0800937 break;
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100938 } else if (lzs->avail_out == 0) {
animalize4ffd05d2019-09-12 22:20:37 +0800939 /* Need to check lzs->avail_out before lzs->avail_in.
940 Maybe lzs's internal state still have a few bytes
941 can be output, grow the output buffer and continue
942 if max_lengh < 0. */
Dong-hee Na1937edd2020-06-23 00:53:07 +0900943 if (data_size == max_length) {
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100944 break;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900945 }
946 if (grow_buffer(&result, max_length) == -1) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800947 goto error;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900948 }
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100949 lzs->next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
950 lzs->avail_out = PyBytes_GET_SIZE(result) - data_size;
animalize4ffd05d2019-09-12 22:20:37 +0800951 } else if (lzs->avail_in == 0) {
952 break;
Larry Hastingsf256c222014-01-25 21:30:37 -0800953 }
954 }
Dong-hee Na1937edd2020-06-23 00:53:07 +0900955 if (data_size != PyBytes_GET_SIZE(result)) {
956 if (_PyBytes_Resize(&result, data_size) == -1) {
Larry Hastingsf256c222014-01-25 21:30:37 -0800957 goto error;
Dong-hee Na1937edd2020-06-23 00:53:07 +0900958 }
959 }
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100960
961 return result;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200962
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100963error:
964 Py_XDECREF(result);
965 return NULL;
966}
967
968static PyObject *
969decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
970{
971 char input_buffer_in_use;
972 PyObject *result;
973 lzma_stream *lzs = &d->lzs;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200974
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100975 /* Prepend unconsumed input if necessary */
976 if (lzs->next_in != NULL) {
977 size_t avail_now, avail_total;
Serhiy Storchaka009b8112015-03-18 21:53:15 +0200978
Antoine Pitrou26795ba2015-01-17 16:22:18 +0100979 /* Number of bytes we can append to input buffer */
980 avail_now = (d->input_buffer + d->input_buffer_size)
981 - (lzs->next_in + lzs->avail_in);
982
983 /* Number of bytes we can append if we move existing
984 contents to beginning of buffer (overwriting
985 consumed input) */
986 avail_total = d->input_buffer_size - lzs->avail_in;
987
988 if (avail_total < len) {
989 size_t offset = lzs->next_in - d->input_buffer;
990 uint8_t *tmp;
991 size_t new_size = d->input_buffer_size + len - avail_now;
992
993 /* Assign to temporary variable first, so we don't
994 lose address of allocated buffer if realloc fails */
995 tmp = PyMem_Realloc(d->input_buffer, new_size);
996 if (tmp == NULL) {
997 PyErr_SetNone(PyExc_MemoryError);
998 return NULL;
999 }
1000 d->input_buffer = tmp;
1001 d->input_buffer_size = new_size;
Serhiy Storchaka009b8112015-03-18 21:53:15 +02001002
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001003 lzs->next_in = d->input_buffer + offset;
1004 }
1005 else if (avail_now < len) {
1006 memmove(d->input_buffer, lzs->next_in,
1007 lzs->avail_in);
1008 lzs->next_in = d->input_buffer;
1009 }
1010 memcpy((void*)(lzs->next_in + lzs->avail_in), data, len);
1011 lzs->avail_in += len;
1012 input_buffer_in_use = 1;
1013 }
1014 else {
1015 lzs->next_in = data;
1016 lzs->avail_in = len;
1017 input_buffer_in_use = 0;
1018 }
1019
1020 result = decompress_buf(d, max_length);
Serhiy Storchakac0b70372016-09-27 20:14:26 +03001021 if (result == NULL) {
1022 lzs->next_in = NULL;
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001023 return NULL;
Serhiy Storchakac0b70372016-09-27 20:14:26 +03001024 }
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001025
1026 if (d->eof) {
1027 d->needs_input = 0;
1028 if (lzs->avail_in > 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03001029 Py_XSETREF(d->unused_data,
Serhiy Storchaka4a1e70f2015-12-27 12:36:18 +02001030 PyBytes_FromStringAndSize((char *)lzs->next_in, lzs->avail_in));
Dong-hee Na1937edd2020-06-23 00:53:07 +09001031 if (d->unused_data == NULL) {
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001032 goto error;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001033 }
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001034 }
1035 }
1036 else if (lzs->avail_in == 0) {
1037 lzs->next_in = NULL;
animalize4ffd05d2019-09-12 22:20:37 +08001038
1039 if (lzs->avail_out == 0) {
1040 /* (avail_in==0 && avail_out==0)
1041 Maybe lzs's internal state still have a few bytes can
1042 be output, try to output them next time. */
1043 d->needs_input = 0;
1044
1045 /* if max_length < 0, lzs->avail_out always > 0 */
1046 assert(max_length >= 0);
1047 } else {
1048 /* Input buffer exhausted, output buffer has space. */
1049 d->needs_input = 1;
1050 }
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001051 }
1052 else {
1053 d->needs_input = 0;
1054
1055 /* If we did not use the input buffer, we now have
1056 to copy the tail from the caller's buffer into the
1057 input buffer */
1058 if (!input_buffer_in_use) {
1059
1060 /* Discard buffer if it's too small
1061 (resizing it may needlessly copy the current contents) */
1062 if (d->input_buffer != NULL &&
1063 d->input_buffer_size < lzs->avail_in) {
1064 PyMem_Free(d->input_buffer);
1065 d->input_buffer = NULL;
1066 }
1067
1068 /* Allocate if necessary */
1069 if (d->input_buffer == NULL) {
1070 d->input_buffer = PyMem_Malloc(lzs->avail_in);
1071 if (d->input_buffer == NULL) {
1072 PyErr_SetNone(PyExc_MemoryError);
1073 goto error;
1074 }
1075 d->input_buffer_size = lzs->avail_in;
1076 }
1077
1078 /* Copy tail */
1079 memcpy(d->input_buffer, lzs->next_in, lzs->avail_in);
1080 lzs->next_in = d->input_buffer;
1081 }
1082 }
Serhiy Storchaka009b8112015-03-18 21:53:15 +02001083
Larry Hastingsf256c222014-01-25 21:30:37 -08001084 return result;
1085
1086error:
1087 Py_XDECREF(result);
1088 return NULL;
1089}
1090
1091/*[clinic input]
1092_lzma.LZMADecompressor.decompress
1093
Larry Hastingsf256c222014-01-25 21:30:37 -08001094 data: Py_buffer
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001095 max_length: Py_ssize_t=-1
Larry Hastingsf256c222014-01-25 21:30:37 -08001096
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001097Decompress *data*, returning uncompressed data as bytes.
Larry Hastingsf256c222014-01-25 21:30:37 -08001098
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001099If *max_length* is nonnegative, returns at most *max_length* bytes of
1100decompressed data. If this limit is reached and further output can be
1101produced, *self.needs_input* will be set to ``False``. In this case, the next
1102call to *decompress()* may provide *data* as b'' to obtain more of the output.
Larry Hastingsf256c222014-01-25 21:30:37 -08001103
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001104If all of the input data was decompressed and returned (either because this
1105was less than *max_length* bytes, or because *max_length* was negative),
1106*self.needs_input* will be set to True.
1107
1108Attempting to decompress data after the end of stream is reached raises an
1109EOFError. Any data found after the end of the stream is ignored and saved in
1110the unused_data attribute.
Larry Hastingsf256c222014-01-25 21:30:37 -08001111[clinic start generated code]*/
1112
1113static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001114_lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data,
1115 Py_ssize_t max_length)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001116/*[clinic end generated code: output=ef4e20ec7122241d input=60c1f135820e309d]*/
Larry Hastingsf256c222014-01-25 21:30:37 -08001117{
1118 PyObject *result = NULL;
1119
1120 ACQUIRE_LOCK(self);
1121 if (self->eof)
1122 PyErr_SetString(PyExc_EOFError, "Already at end of stream");
1123 else
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001124 result = decompress(self, data->buf, data->len, max_length);
Larry Hastingsf256c222014-01-25 21:30:37 -08001125 RELEASE_LOCK(self);
1126 return result;
1127}
1128
Larry Hastingsf256c222014-01-25 21:30:37 -08001129static int
Dong-hee Na1937edd2020-06-23 00:53:07 +09001130Decompressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
Larry Hastingsf256c222014-01-25 21:30:37 -08001131{
1132 lzma_filter filters[LZMA_FILTERS_MAX + 1];
1133 lzma_ret lzret;
1134
Dong-hee Na1937edd2020-06-23 00:53:07 +09001135 if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001136 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001137 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001138 lzret = lzma_raw_decoder(lzs, filters);
1139 free_filter_chain(filters);
Dong-hee Na1937edd2020-06-23 00:53:07 +09001140 if (catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001141 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001142 }
1143 else {
Larry Hastingsf256c222014-01-25 21:30:37 -08001144 return 0;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001145 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001146}
1147
1148/*[clinic input]
1149_lzma.LZMADecompressor.__init__
1150
Larry Hastingsf256c222014-01-25 21:30:37 -08001151 format: int(c_default="FORMAT_AUTO") = FORMAT_AUTO
1152 Specifies the container format of the input stream. If this is
1153 FORMAT_AUTO (the default), the decompressor will automatically detect
1154 whether the input is FORMAT_XZ or FORMAT_ALONE. Streams created with
1155 FORMAT_RAW cannot be autodetected.
1156
1157 memlimit: object = None
1158 Limit the amount of memory used by the decompressor. This will cause
1159 decompression to fail if the input cannot be decompressed within the
1160 given limit.
1161
1162 filters: object = None
1163 A custom filter chain. This argument is required for FORMAT_RAW, and
1164 not accepted with any other format. When provided, this should be a
1165 sequence of dicts, each indicating the ID and options for a single
1166 filter.
1167
1168Create a decompressor object for decompressing data incrementally.
1169
1170For one-shot decompression, use the decompress() function instead.
1171[clinic start generated code]*/
1172
1173static int
Larry Hastings89964c42015-04-14 18:07:59 -04001174_lzma_LZMADecompressor___init___impl(Decompressor *self, int format,
1175 PyObject *memlimit, PyObject *filters)
Serhiy Storchaka7a9579c2016-05-02 13:45:20 +03001176/*[clinic end generated code: output=3e1821f8aa36564c input=81fe684a6c2f8a27]*/
Larry Hastingsf256c222014-01-25 21:30:37 -08001177{
1178 const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK;
1179 uint64_t memlimit_ = UINT64_MAX;
1180 lzma_ret lzret;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001181 _lzma_state *state = PyType_GetModuleState(Py_TYPE(self));
1182 assert(state != NULL);
Larry Hastingsf256c222014-01-25 21:30:37 -08001183
1184 if (memlimit != Py_None) {
1185 if (format == FORMAT_RAW) {
1186 PyErr_SetString(PyExc_ValueError,
1187 "Cannot specify memory limit with FORMAT_RAW");
1188 return -1;
1189 }
1190 memlimit_ = PyLong_AsUnsignedLongLong(memlimit);
Dong-hee Na1937edd2020-06-23 00:53:07 +09001191 if (PyErr_Occurred()) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001192 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001193 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001194 }
1195
1196 if (format == FORMAT_RAW && filters == Py_None) {
1197 PyErr_SetString(PyExc_ValueError,
1198 "Must specify filters for FORMAT_RAW");
1199 return -1;
1200 } else if (format != FORMAT_RAW && filters != Py_None) {
1201 PyErr_SetString(PyExc_ValueError,
1202 "Cannot specify filters except with FORMAT_RAW");
1203 return -1;
1204 }
1205
1206 self->alloc.opaque = NULL;
1207 self->alloc.alloc = PyLzma_Malloc;
1208 self->alloc.free = PyLzma_Free;
1209 self->lzs.allocator = &self->alloc;
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001210 self->lzs.next_in = NULL;
Larry Hastingsf256c222014-01-25 21:30:37 -08001211
Victor Stinner9b7cf752018-06-23 10:35:23 +02001212 PyThread_type_lock lock = PyThread_allocate_lock();
1213 if (lock == NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001214 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
1215 return -1;
1216 }
Victor Stinner9b7cf752018-06-23 10:35:23 +02001217 if (self->lock != NULL) {
1218 PyThread_free_lock(self->lock);
1219 }
1220 self->lock = lock;
Larry Hastingsf256c222014-01-25 21:30:37 -08001221
1222 self->check = LZMA_CHECK_UNKNOWN;
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001223 self->needs_input = 1;
1224 self->input_buffer = NULL;
1225 self->input_buffer_size = 0;
Oren Milmand019bc82018-02-13 12:28:33 +02001226 Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
Dong-hee Na1937edd2020-06-23 00:53:07 +09001227 if (self->unused_data == NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001228 goto error;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001229 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001230
1231 switch (format) {
1232 case FORMAT_AUTO:
1233 lzret = lzma_auto_decoder(&self->lzs, memlimit_, decoder_flags);
Dong-hee Na1937edd2020-06-23 00:53:07 +09001234 if (catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001235 break;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001236 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001237 return 0;
1238
1239 case FORMAT_XZ:
1240 lzret = lzma_stream_decoder(&self->lzs, memlimit_, decoder_flags);
Dong-hee Na1937edd2020-06-23 00:53:07 +09001241 if (catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001242 break;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001243 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001244 return 0;
1245
1246 case FORMAT_ALONE:
1247 self->check = LZMA_CHECK_NONE;
1248 lzret = lzma_alone_decoder(&self->lzs, memlimit_);
Dong-hee Na1937edd2020-06-23 00:53:07 +09001249 if (catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001250 break;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001251 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001252 return 0;
1253
1254 case FORMAT_RAW:
1255 self->check = LZMA_CHECK_NONE;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001256 if (Decompressor_init_raw(state, &self->lzs, filters) == -1) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001257 break;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001258 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001259 return 0;
1260
1261 default:
1262 PyErr_Format(PyExc_ValueError,
1263 "Invalid container format: %d", format);
1264 break;
1265 }
1266
1267error:
1268 Py_CLEAR(self->unused_data);
Larry Hastingsf256c222014-01-25 21:30:37 -08001269 PyThread_free_lock(self->lock);
1270 self->lock = NULL;
Larry Hastingsf256c222014-01-25 21:30:37 -08001271 return -1;
1272}
1273
1274static void
1275Decompressor_dealloc(Decompressor *self)
1276{
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001277 if(self->input_buffer != NULL)
1278 PyMem_Free(self->input_buffer);
Serhiy Storchaka009b8112015-03-18 21:53:15 +02001279
Larry Hastingsf256c222014-01-25 21:30:37 -08001280 lzma_end(&self->lzs);
1281 Py_CLEAR(self->unused_data);
Dong-hee Na1937edd2020-06-23 00:53:07 +09001282 if (self->lock != NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001283 PyThread_free_lock(self->lock);
Dong-hee Na1937edd2020-06-23 00:53:07 +09001284 }
1285 PyTypeObject *tp = Py_TYPE(self);
1286 tp->tp_free((PyObject *)self);
1287 Py_DECREF(tp);
1288}
1289
1290static int
1291Decompressor_traverse(Decompressor *self, visitproc visit, void *arg)
1292{
1293 Py_VISIT(Py_TYPE(self));
1294 return 0;
1295}
1296
Larry Hastingsf256c222014-01-25 21:30:37 -08001297static PyMethodDef Decompressor_methods[] = {
1298 _LZMA_LZMADECOMPRESSOR_DECOMPRESS_METHODDEF
Larry Hastingsf256c222014-01-25 21:30:37 -08001299 {NULL}
1300};
1301
1302PyDoc_STRVAR(Decompressor_check_doc,
1303"ID of the integrity check used by the input stream.");
1304
1305PyDoc_STRVAR(Decompressor_eof_doc,
1306"True if the end-of-stream marker has been reached.");
1307
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001308PyDoc_STRVAR(Decompressor_needs_input_doc,
1309"True if more input is needed before more decompressed data can be produced.");
1310
Larry Hastingsf256c222014-01-25 21:30:37 -08001311PyDoc_STRVAR(Decompressor_unused_data_doc,
1312"Data found after the end of the compressed stream.");
1313
1314static PyMemberDef Decompressor_members[] = {
1315 {"check", T_INT, offsetof(Decompressor, check), READONLY,
1316 Decompressor_check_doc},
1317 {"eof", T_BOOL, offsetof(Decompressor, eof), READONLY,
1318 Decompressor_eof_doc},
Antoine Pitrou26795ba2015-01-17 16:22:18 +01001319 {"needs_input", T_BOOL, offsetof(Decompressor, needs_input), READONLY,
1320 Decompressor_needs_input_doc},
Larry Hastingsf256c222014-01-25 21:30:37 -08001321 {"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY,
1322 Decompressor_unused_data_doc},
1323 {NULL}
1324};
1325
Dong-hee Na1937edd2020-06-23 00:53:07 +09001326static PyType_Slot lzma_decompressor_type_slots[] = {
1327 {Py_tp_dealloc, Decompressor_dealloc},
1328 {Py_tp_methods, Decompressor_methods},
1329 {Py_tp_init, _lzma_LZMADecompressor___init__},
1330 {Py_tp_new, PyType_GenericNew},
1331 {Py_tp_doc, (char *)_lzma_LZMADecompressor___init____doc__},
1332 {Py_tp_traverse, Decompressor_traverse},
1333 {Py_tp_members, Decompressor_members},
1334 {0, 0}
1335};
1336
1337static PyType_Spec lzma_decompressor_type_spec = {
1338 .name = "_lzma.LZMADecompressor",
1339 .basicsize = sizeof(Decompressor),
1340 // Calling PyType_GetModuleState() on a subclass is not safe.
1341 // lzma_decompressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
1342 // which prevents to create a subclass.
1343 // So calling PyType_GetModuleState() in this file is always safe.
1344 .flags = Py_TPFLAGS_DEFAULT,
1345 .slots = lzma_decompressor_type_slots,
Larry Hastingsf256c222014-01-25 21:30:37 -08001346};
1347
1348
1349/* Module-level functions. */
1350
1351/*[clinic input]
1352_lzma.is_check_supported
1353 check_id: int
1354 /
1355
1356Test whether the given integrity check is supported.
1357
1358Always returns True for CHECK_NONE and CHECK_CRC32.
1359[clinic start generated code]*/
1360
1361static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001362_lzma_is_check_supported_impl(PyObject *module, int check_id)
1363/*[clinic end generated code: output=e4f14ba3ce2ad0a5 input=5518297b97b2318f]*/
Larry Hastingsf256c222014-01-25 21:30:37 -08001364{
1365 return PyBool_FromLong(lzma_check_is_supported(check_id));
1366}
1367
Dong-hee Na1937edd2020-06-23 00:53:07 +09001368PyDoc_STRVAR(_lzma__encode_filter_properties__doc__,
1369"_encode_filter_properties($module, filter, /)\n"
1370"--\n"
1371"\n"
1372"Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).\n"
1373"\n"
1374"The result does not include the filter ID itself, only the options.");
Larry Hastingsf256c222014-01-25 21:30:37 -08001375
Dong-hee Na1937edd2020-06-23 00:53:07 +09001376#define _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF \
1377 {"_encode_filter_properties", (PyCFunction)_lzma__encode_filter_properties, METH_O, _lzma__encode_filter_properties__doc__},
Larry Hastingsf256c222014-01-25 21:30:37 -08001378
Dong-hee Na1937edd2020-06-23 00:53:07 +09001379static PyObject *
1380_lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter);
Larry Hastingsf256c222014-01-25 21:30:37 -08001381
Dong-hee Na1937edd2020-06-23 00:53:07 +09001382static PyObject *
1383_lzma__encode_filter_properties(PyObject *module, PyObject *arg)
1384{
1385 PyObject *return_value = NULL;
1386 lzma_filter filter = {LZMA_VLI_UNKNOWN, NULL};
1387 _lzma_state *state = get_lzma_state(module);
1388 assert(state != NULL);
1389 if (!lzma_filter_converter(state, arg, &filter)) {
1390 goto exit;
1391 }
1392 return_value = _lzma__encode_filter_properties_impl(module, filter);
1393
1394exit:
1395 /* Cleanup for filter */
1396 if (filter.id != LZMA_VLI_UNKNOWN) {
1397 PyMem_Free(filter.options);
1398 }
1399
1400 return return_value;
1401}
Larry Hastingsf256c222014-01-25 21:30:37 -08001402
1403static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001404_lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter)
Larry Hastingsf256c222014-01-25 21:30:37 -08001405{
1406 lzma_ret lzret;
1407 uint32_t encoded_size;
1408 PyObject *result = NULL;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001409 _lzma_state *state = get_lzma_state(module);
1410 assert(state != NULL);
Larry Hastingsf256c222014-01-25 21:30:37 -08001411
1412 lzret = lzma_properties_size(&encoded_size, &filter);
Dong-hee Na1937edd2020-06-23 00:53:07 +09001413 if (catch_lzma_error(state, lzret))
Larry Hastingsf256c222014-01-25 21:30:37 -08001414 goto error;
1415
1416 result = PyBytes_FromStringAndSize(NULL, encoded_size);
1417 if (result == NULL)
1418 goto error;
1419
1420 lzret = lzma_properties_encode(
1421 &filter, (uint8_t *)PyBytes_AS_STRING(result));
Dong-hee Na1937edd2020-06-23 00:53:07 +09001422 if (catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001423 goto error;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001424 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001425
1426 return result;
1427
1428error:
1429 Py_XDECREF(result);
1430 return NULL;
1431}
1432
1433
1434/*[clinic input]
1435_lzma._decode_filter_properties
1436 filter_id: lzma_vli
1437 encoded_props: Py_buffer
1438 /
1439
1440Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).
1441
1442The result does not include the filter ID itself, only the options.
1443[clinic start generated code]*/
1444
1445static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001446_lzma__decode_filter_properties_impl(PyObject *module, lzma_vli filter_id,
Larry Hastings89964c42015-04-14 18:07:59 -04001447 Py_buffer *encoded_props)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001448/*[clinic end generated code: output=714fd2ef565d5c60 input=246410800782160c]*/
Larry Hastingsf256c222014-01-25 21:30:37 -08001449{
1450 lzma_filter filter;
1451 lzma_ret lzret;
1452 PyObject *result = NULL;
1453 filter.id = filter_id;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001454 _lzma_state *state = get_lzma_state(module);
1455 assert(state != NULL);
Larry Hastingsf256c222014-01-25 21:30:37 -08001456
1457 lzret = lzma_properties_decode(
1458 &filter, NULL, encoded_props->buf, encoded_props->len);
Dong-hee Na1937edd2020-06-23 00:53:07 +09001459 if (catch_lzma_error(state, lzret)) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001460 return NULL;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001461 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001462
1463 result = build_filter_spec(&filter);
1464
1465 /* We use vanilla free() here instead of PyMem_Free() - filter.options was
1466 allocated by lzma_properties_decode() using the default allocator. */
1467 free(filter.options);
1468 return result;
1469}
1470
Larry Hastingsf256c222014-01-25 21:30:37 -08001471/* Some of our constants are more than 32 bits wide, so PyModule_AddIntConstant
1472 would not work correctly on platforms with 32-bit longs. */
1473static int
Benjamin Petersonaf580df2016-09-06 10:46:49 -07001474module_add_int_constant(PyObject *m, const char *name, long long value)
Larry Hastingsf256c222014-01-25 21:30:37 -08001475{
1476 PyObject *o = PyLong_FromLongLong(value);
Dong-hee Na1937edd2020-06-23 00:53:07 +09001477 if (o == NULL) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001478 return -1;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001479 }
1480 if (PyModule_AddObject(m, name, o) == 0) {
Larry Hastingsf256c222014-01-25 21:30:37 -08001481 return 0;
Dong-hee Na1937edd2020-06-23 00:53:07 +09001482 }
Larry Hastingsf256c222014-01-25 21:30:37 -08001483 Py_DECREF(o);
1484 return -1;
1485}
1486
Dong-hee Na1937edd2020-06-23 00:53:07 +09001487static int
1488lzma_exec(PyObject *module)
1489{
1490#define ADD_INT_PREFIX_MACRO(module, macro) \
1491 do { \
1492 if (module_add_int_constant(module, #macro, LZMA_ ## macro) < 0) { \
1493 return -1; \
1494 } \
1495 } while(0)
1496
1497#define ADD_INT_MACRO(module, macro) \
1498 do { \
1499 if (PyModule_AddIntMacro(module, macro) < 0) { \
1500 return -1; \
1501 } \
1502 } while (0)
1503
1504
1505 _lzma_state *state = get_lzma_state(module);
1506
1507 state->empty_tuple = PyTuple_New(0);
1508 if (state->empty_tuple == NULL) {
1509 return -1;
1510 }
1511
1512 ADD_INT_MACRO(module, FORMAT_AUTO);
1513 ADD_INT_MACRO(module, FORMAT_XZ);
1514 ADD_INT_MACRO(module, FORMAT_ALONE);
1515 ADD_INT_MACRO(module, FORMAT_RAW);
1516 ADD_INT_PREFIX_MACRO(module, CHECK_NONE);
1517 ADD_INT_PREFIX_MACRO(module, CHECK_CRC32);
1518 ADD_INT_PREFIX_MACRO(module, CHECK_CRC64);
1519 ADD_INT_PREFIX_MACRO(module, CHECK_SHA256);
1520 ADD_INT_PREFIX_MACRO(module, CHECK_ID_MAX);
1521 ADD_INT_PREFIX_MACRO(module, CHECK_UNKNOWN);
1522 ADD_INT_PREFIX_MACRO(module, FILTER_LZMA1);
1523 ADD_INT_PREFIX_MACRO(module, FILTER_LZMA2);
1524 ADD_INT_PREFIX_MACRO(module, FILTER_DELTA);
1525 ADD_INT_PREFIX_MACRO(module, FILTER_X86);
1526 ADD_INT_PREFIX_MACRO(module, FILTER_IA64);
1527 ADD_INT_PREFIX_MACRO(module, FILTER_ARM);
1528 ADD_INT_PREFIX_MACRO(module, FILTER_ARMTHUMB);
1529 ADD_INT_PREFIX_MACRO(module, FILTER_SPARC);
1530 ADD_INT_PREFIX_MACRO(module, FILTER_POWERPC);
1531 ADD_INT_PREFIX_MACRO(module, MF_HC3);
1532 ADD_INT_PREFIX_MACRO(module, MF_HC4);
1533 ADD_INT_PREFIX_MACRO(module, MF_BT2);
1534 ADD_INT_PREFIX_MACRO(module, MF_BT3);
1535 ADD_INT_PREFIX_MACRO(module, MF_BT4);
1536 ADD_INT_PREFIX_MACRO(module, MODE_FAST);
1537 ADD_INT_PREFIX_MACRO(module, MODE_NORMAL);
1538 ADD_INT_PREFIX_MACRO(module, PRESET_DEFAULT);
1539 ADD_INT_PREFIX_MACRO(module, PRESET_EXTREME);
1540
1541 state->error = PyErr_NewExceptionWithDoc("_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL);
1542 if (state->error == NULL) {
1543 return -1;
1544 }
1545
1546 if (PyModule_AddType(module, (PyTypeObject *)state->error) < 0) {
1547 return -1;
1548 }
1549
1550
1551 state->lzma_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
1552 &lzma_compressor_type_spec, NULL);
1553 if (state->lzma_compressor_type == NULL) {
1554 return -1;
1555 }
1556
1557 if (PyModule_AddType(module, state->lzma_compressor_type) < 0) {
1558 return -1;
1559 }
1560
1561 state->lzma_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
1562 &lzma_decompressor_type_spec, NULL);
1563 if (state->lzma_decompressor_type == NULL) {
1564 return -1;
1565 }
1566
1567 if (PyModule_AddType(module, state->lzma_decompressor_type) < 0) {
1568 return -1;
1569 }
1570
1571 return 0;
1572}
1573
1574static PyMethodDef lzma_methods[] = {
1575 _LZMA_IS_CHECK_SUPPORTED_METHODDEF
1576 _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF
1577 _LZMA__DECODE_FILTER_PROPERTIES_METHODDEF
1578 {NULL}
1579};
1580
1581static PyModuleDef_Slot lzma_slots[] = {
1582 {Py_mod_exec, lzma_exec},
1583 {0, NULL}
1584};
1585
1586static int
1587lzma_traverse(PyObject *module, visitproc visit, void *arg)
1588{
1589 _lzma_state *state = get_lzma_state(module);
1590 Py_VISIT(state->lzma_compressor_type);
1591 Py_VISIT(state->lzma_decompressor_type);
1592 Py_VISIT(state->error);
1593 Py_VISIT(state->empty_tuple);
1594 return 0;
1595}
1596
1597static int
1598lzma_clear(PyObject *module)
1599{
1600 _lzma_state *state = get_lzma_state(module);
1601 Py_CLEAR(state->lzma_compressor_type);
1602 Py_CLEAR(state->lzma_decompressor_type);
1603 Py_CLEAR(state->error);
1604 Py_CLEAR(state->empty_tuple);
1605 return 0;
1606}
1607
1608static void
1609lzma_free(void *module)
1610{
1611 lzma_clear((PyObject *)module);
1612}
1613
1614static PyModuleDef _lzmamodule = {
1615 PyModuleDef_HEAD_INIT,
1616 .m_name = "_lzma",
1617 .m_size = sizeof(_lzma_state),
1618 .m_methods = lzma_methods,
1619 .m_slots = lzma_slots,
1620 .m_traverse = lzma_traverse,
1621 .m_clear = lzma_clear,
1622 .m_free = lzma_free,
1623};
Larry Hastingsf256c222014-01-25 21:30:37 -08001624
1625PyMODINIT_FUNC
1626PyInit__lzma(void)
1627{
Dong-hee Na1937edd2020-06-23 00:53:07 +09001628 return PyModuleDef_Init(&_lzmamodule);
Larry Hastingsf256c222014-01-25 21:30:37 -08001629}