Blame - Lib/lzma.py - platform/external/python/cpython3

2011-11-30 00:25:06 +0200

[diff] [blame]

1

"""Interface to the liblzma compression library.

2

3

This module provides a class for reading and writing compressed files,

4

classes for incremental (de)compression, and convenience functions for

5

one-shot (de)compression.

6

7

These classes and functions support both the XZ and legacy LZMA

8

container formats, as well as raw compressed data streams.

"""

__all__ = [

"CHECK_NONE", "CHECK_CRC32", "CHECK_CRC64", "CHECK_SHA256",

13

"CHECK_ID_MAX", "CHECK_UNKNOWN",

14

"FILTER_LZMA1", "FILTER_LZMA2", "FILTER_DELTA", "FILTER_X86", "FILTER_IA64",

15

"FILTER_ARM", "FILTER_ARMTHUMB", "FILTER_POWERPC", "FILTER_SPARC",

16

"FORMAT_AUTO", "FORMAT_XZ", "FORMAT_ALONE", "FORMAT_RAW",

17

"MF_HC3", "MF_HC4", "MF_BT2", "MF_BT3", "MF_BT4",

18

"MODE_FAST", "MODE_NORMAL", "PRESET_DEFAULT", "PRESET_EXTREME",

19

20

"LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError",

Nadeem Vawda

2012-06-04 23:38:12 +0200

[diff] [blame]

21

"open", "compress", "decompress", "is_check_supported",

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

22

]

23

Nadeem Vawda

2012-06-04 23:38:12 +0200

[diff] [blame]

24

import builtins

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

25

import io

26

from _lzma import *

Nadeem Vawda

a425c3d

2012-06-21 23:36:48 +0200

[diff] [blame]

27

from _lzma import _encode_filter_properties, _decode_filter_properties

Antoine Pitrou

2015-04-11 00:31:01 +0200

[diff] [blame]

28

import _compression

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

_MODE_CLOSED = 0

_MODE_READ = 1

Antoine Pitrou

2015-04-11 00:31:01 +0200

[diff] [blame]

33

# Value 2 no longer used

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

34

_MODE_WRITE = 3

35

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

36

Antoine Pitrou

2015-04-11 00:31:01 +0200

[diff] [blame]

37

class LZMAFile(_compression.BaseStream):

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

38

39

"""A file object providing transparent LZMA (de)compression.

40

41

An LZMAFile can act as a wrapper for an existing file object, or

42

refer directly to a named file on disk.

43

44

Note that LZMAFile provides a *binary* file interface - data read

45

is returned as bytes, and data to be written must be given as bytes.

46

"""

47

48

def __init__(self, filename=None, mode="r", *,

Nadeem Vawda

2012-06-04 23:34:07 +0200

[diff] [blame]

49

format=None, check=-1, preset=None, filters=None):

50

"""Open an LZMA-compressed file in binary mode.

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

51

Nadeem Vawda

2012-06-04 23:34:07 +0200

[diff] [blame]

52

filename can be either an actual file name (given as a str or

53

bytes object), in which case the named file is opened, or it can

54

be an existing file object to read from or write to.

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

55

Nadeem Vawda

42ca982

2013-10-19 00:06:19 +0200

[diff] [blame]

56

mode can be "r" for reading (default), "w" for (over)writing,

57

"x" for creating exclusively, or "a" for appending. These can

58

equivalently be given as "rb", "wb", "xb" and "ab" respectively.

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

59

60

format specifies the container format to use for the file.

61

If mode is "r", this defaults to FORMAT_AUTO. Otherwise, the

62

default is FORMAT_XZ.

63

64

check specifies the integrity check to use. This argument can

65

only be used when opening a file for writing. For FORMAT_XZ,

66

the default is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not

67

support integrity checks - for these formats, check must be

68

omitted, or be CHECK_NONE.

69

70

When opening a file for reading, the *preset* argument is not

71

meaningful, and should be omitted. The *filters* argument should

72

also be omitted, except when format is FORMAT_RAW (in which case

73

it is required).

74

75

When opening a file for writing, the settings used by the

76

compressor can be specified either as a preset compression

77

level (with the *preset* argument), or in detail as a custom

78

filter chain (with the *filters* argument). For FORMAT_XZ and

79

FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset

80

level. For FORMAT_RAW, the caller must always specify a filter

81

chain; the raw compressor does not support preset compression

82

levels.

83

84

preset (if provided) should be an integer in the range 0-9,

85

optionally OR-ed with the constant PRESET_EXTREME.

86

87

filters (if provided) should be a sequence of dicts. Each dict

88

should have an entry for "id" indicating ID of the filter, plus

89

additional entries for options to the filter.

90

"""

91

self._fp = None

92

self._closefp = False

93

self._mode = _MODE_CLOSED

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

94

Nadeem Vawda

6cbb20c

2012-06-04 23:36:24 +0200

[diff] [blame]

95

if mode in ("r", "rb"):

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

96

if check != -1:

97

raise ValueError("Cannot specify an integrity check "

98

"when opening a file for reading")

99

if preset is not None:

100

raise ValueError("Cannot specify a preset compression "

101

"level when opening a file for reading")

102

if format is None:

103

format = FORMAT_AUTO

104

mode_code = _MODE_READ

Nadeem Vawda

42ca982

2013-10-19 00:06:19 +0200

[diff] [blame]

105

elif mode in ("w", "wb", "a", "ab", "x", "xb"):

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

106

if format is None:

107

format = FORMAT_XZ

108

mode_code = _MODE_WRITE

109

self._compressor = LZMACompressor(format=format, check=check,

110

preset=preset, filters=filters)

Antoine Pitrou

2015-04-11 00:31:01 +0200

[diff] [blame]

111

self._pos = 0

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

112

else:

113

raise ValueError("Invalid mode: {!r}".format(mode))

114

Nadeem Vawda

2012-06-04 23:34:07 +0200

[diff] [blame]

115

if isinstance(filename, (str, bytes)):

Nadeem Vawda

6cbb20c

2012-06-04 23:36:24 +0200

[diff] [blame]

116

if "b" not in mode:

117

mode += "b"

Nadeem Vawda

2012-06-04 23:38:12 +0200

[diff] [blame]

118

self._fp = builtins.open(filename, mode)

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

119

self._closefp = True

120

self._mode = mode_code

Nadeem Vawda

2012-06-04 23:34:07 +0200

[diff] [blame]

121

elif hasattr(filename, "read") or hasattr(filename, "write"):

122

self._fp = filename

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

123

self._mode = mode_code

124

else:

Nadeem Vawda

2012-06-04 23:34:07 +0200

[diff] [blame]

125

raise TypeError("filename must be a str or bytes object, or a file")

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

126

Antoine Pitrou

2015-04-11 00:31:01 +0200

[diff] [blame]

127

if self._mode == _MODE_READ:

128

raw = _compression.DecompressReader(self._fp, LZMADecompressor,

129

trailing_error=LZMAError, format=format, filters=filters)

130

self._buffer = io.BufferedReader(raw)

131

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

132

def close(self):

133

"""Flush and close the file.

134

135

May be called more than once without error. Once the file is

136

closed, any other operation on it will raise a ValueError.

137

"""

138

if self._mode == _MODE_CLOSED:

139

return

140

try:

Antoine Pitrou

2015-04-11 00:31:01 +0200

[diff] [blame]

141

if self._mode == _MODE_READ:

142

self._buffer.close()

143

self._buffer = None

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

144

elif self._mode == _MODE_WRITE:

145

self._fp.write(self._compressor.flush())

146

self._compressor = None

finally:

try:

if self._closefp:

self._fp.close()

finally:

self._fp = None

self._closefp = False

154

self._mode = _MODE_CLOSED

@property

def closed(self):

"""True if this file is closed."""

159

return self._mode == _MODE_CLOSED

160

161

def fileno(self):

162

"""Return the file descriptor for the underlying file."""

163

self._check_not_closed()

164

return self._fp.fileno()

165

166

def seekable(self):

167

"""Return whether the file supports seeking."""

Antoine Pitrou

2015-04-11 00:31:01 +0200

[diff] [blame]

168

return self.readable() and self._buffer.seekable()

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

169

170

def readable(self):

171

"""Return whether the file was opened for reading."""

172

self._check_not_closed()

Antoine Pitrou

2015-04-11 00:31:01 +0200

[diff] [blame]

173

return self._mode == _MODE_READ

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

174

175

def writable(self):

176

"""Return whether the file was opened for writing."""

177

self._check_not_closed()

178

return self._mode == _MODE_WRITE

179

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

180

def peek(self, size=-1):

181

"""Return buffered data without advancing the file position.

182

183

Always returns at least one byte of data, unless at EOF.

184

The exact number of bytes returned is unspecified.

185

"""

186

self._check_can_read()

Antoine Pitrou

2015-04-11 00:31:01 +0200

[diff] [blame]

187

# Relies on the undocumented fact that BufferedReader.peek() always

188

# returns at least one byte (except at EOF)

189

return self._buffer.peek(size)

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

190

191

def read(self, size=-1):

192

"""Read up to size uncompressed bytes from the file.

193

194

If size is negative or omitted, read until EOF is reached.

195

Returns b"" if the file is already at EOF.

196

"""

197

self._check_can_read()

Antoine Pitrou

2015-04-11 00:31:01 +0200

[diff] [blame]

198

return self._buffer.read(size)

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

199

200

def read1(self, size=-1):

Nadeem Vawda

37d3ff1

2012-08-05 02:19:09 +0200

[diff] [blame]

201

"""Read up to size uncompressed bytes, while trying to avoid

Antoine Pitrou

2015-04-11 00:31:01 +0200

[diff] [blame]

202

making multiple reads from the underlying stream. Reads up to a

203

buffer's worth of data if size is negative.

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

204

205

Returns b"" if the file is at EOF.

206

"""

207

self._check_can_read()

Antoine Pitrou

2015-04-11 00:31:01 +0200

[diff] [blame]

208

if size < 0:

209

size = io.DEFAULT_BUFFER_SIZE

210

return self._buffer.read1(size)

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

211

Nadeem Vawda

186370b

2012-10-21 16:57:32 +0200

[diff] [blame]

212

def readline(self, size=-1):

213

"""Read a line of uncompressed bytes from the file.

214

215

The terminating newline (if present) is retained. If size is

216

non-negative, no more than size bytes will be read (in which

217

case the line may be incomplete). Returns b'' if already at EOF.

218

"""

219

self._check_can_read()

Antoine Pitrou

2015-04-11 00:31:01 +0200

[diff] [blame]

220

return self._buffer.readline(size)

Nadeem Vawda

186370b

2012-10-21 16:57:32 +0200

[diff] [blame]

221

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

222

def write(self, data):

223

"""Write a bytes object to the file.

224

225

Returns the number of uncompressed bytes written, which is

226

always len(data). Note that due to buffering, the file on disk

227

may not reflect the data written until close() is called.

228

"""

229

self._check_can_write()

230

compressed = self._compressor.compress(data)

231

self._fp.write(compressed)

232

self._pos += len(data)

233

return len(data)

234

Antoine Pitrou

2015-04-11 00:31:01 +0200

[diff] [blame]

235

def seek(self, offset, whence=io.SEEK_SET):

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

236

"""Change the file position.

237

238

The new position is specified by offset, relative to the

239

position indicated by whence. Possible values for whence are:

240

241

0: start of stream (default): offset must not be negative

242

1: current stream position

243

2: end of stream; offset must not be positive

244

245

Returns the new file position.

246

Antoine Pitrou

2015-04-11 00:31:01 +0200

[diff] [blame]

247

Note that seeking is emulated, so depending on the parameters,

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

248

this operation may be extremely slow.

249

"""

250

self._check_can_seek()

Antoine Pitrou

2015-04-11 00:31:01 +0200

[diff] [blame]

251

return self._buffer.seek(offset, whence)

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

252

253

def tell(self):

254

"""Return the current file position."""

255

self._check_not_closed()

Antoine Pitrou

2015-04-11 00:31:01 +0200

[diff] [blame]

256

if self._mode == _MODE_READ:

257

return self._buffer.tell()

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

return self._pos

Nadeem Vawda

2012-06-04 23:38:12 +0200

[diff] [blame]

261

def open(filename, mode="rb", *,

262

format=None, check=-1, preset=None, filters=None,

263

encoding=None, errors=None, newline=None):

264

"""Open an LZMA-compressed file in binary or text mode.

265

Nadeem Vawda

2012-10-13 04:26:49 +0200

[diff] [blame]

266

filename can be either an actual file name (given as a str or bytes

267

object), in which case the named file is opened, or it can be an

268

existing file object to read from or write to.

Nadeem Vawda

2012-06-04 23:38:12 +0200

[diff] [blame]

269

Nadeem Vawda

42ca982

2013-10-19 00:06:19 +0200

[diff] [blame]

270

The mode argument can be "r", "rb" (default), "w", "wb", "x", "xb",

271

"a", or "ab" for binary mode, or "rt", "wt", "xt", or "at" for text

272

mode.

Nadeem Vawda

2012-06-04 23:38:12 +0200

[diff] [blame]

273

Nadeem Vawda

2012-10-13 04:26:49 +0200

[diff] [blame]

274

The format, check, preset and filters arguments specify the

275

compression settings, as for LZMACompressor, LZMADecompressor and

276

LZMAFile.

Nadeem Vawda

2012-06-04 23:38:12 +0200

[diff] [blame]

277

Nadeem Vawda

2012-10-13 04:26:49 +0200

[diff] [blame]

278

For binary mode, this function is equivalent to the LZMAFile

279

constructor: LZMAFile(filename, mode, ...). In this case, the

280

encoding, errors and newline arguments must not be provided.

Nadeem Vawda

2012-06-04 23:38:12 +0200

[diff] [blame]

281

Serhiy Storchaka

6a7b3a7

2016-04-17 08:32:47 +0300

[diff] [blame]

282

For text mode, an LZMAFile object is created, and wrapped in an

Nadeem Vawda

2012-10-13 04:26:49 +0200

[diff] [blame]

283

io.TextIOWrapper instance with the specified encoding, error

284

handling behavior, and line ending(s).

Nadeem Vawda

2012-06-04 23:38:12 +0200

[diff] [blame]

"""

if "t" in mode:

if "b" in mode:

raise ValueError("Invalid mode: %r" % (mode,))

290

else:

291

if encoding is not None:

292

raise ValueError("Argument 'encoding' not supported in binary mode")

293

if errors is not None:

294

raise ValueError("Argument 'errors' not supported in binary mode")

295

if newline is not None:

296

raise ValueError("Argument 'newline' not supported in binary mode")

297

298

lz_mode = mode.replace("t", "")

299

binary_file = LZMAFile(filename, lz_mode, format=format, check=check,

300

preset=preset, filters=filters)

301

302

if "t" in mode:

303

return io.TextIOWrapper(binary_file, encoding, errors, newline)

else:

return binary_file

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

308

def compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None):

309

"""Compress a block of data.

310

311

Refer to LZMACompressor's docstring for a description of the

312

optional arguments *format*, *check*, *preset* and *filters*.

313

Nadeem Vawda

2012-10-13 04:26:49 +0200

[diff] [blame]

314

For incremental compression, use an LZMACompressor instead.

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

315

"""

316

comp = LZMACompressor(format, check, preset, filters)

317

return comp.compress(data) + comp.flush()

318

319

320

def decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None):

321

"""Decompress a block of data.

322

323

Refer to LZMADecompressor's docstring for a description of the

324

optional arguments *format*, *check* and *filters*.

325

Nadeem Vawda

2012-10-13 04:26:49 +0200

[diff] [blame]

326

For incremental decompression, use an LZMADecompressor instead.

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

"""

results = []

while True:

decomp = LZMADecompressor(format, memlimit, filters)

Nadeem Vawda

9c72ebc

2013-12-04 23:03:49 +0100

[diff] [blame]

331

try:

332

res = decomp.decompress(data)

333

except LZMAError:

334

if results:

335

break # Leftover data is not a valid LZMA/XZ stream; ignore it.

336

else:

337

raise # Error on the first iteration; bail out.

338

results.append(res)

Nadeem Vawda

2011-11-30 00:25:06 +0200

[diff] [blame]

339

if not decomp.eof:

340

raise LZMAError("Compressed data ended before the "

341

"end-of-stream marker was reached")

Nadeem Vawda