Blame - Lib/lzma.py - platform/external/python/cpython2

blob: 07906910c5641b11621958859306add92ce5fd23 [file] [log] [blame]

Nadeem Vawda	3ff069e	2011-11-30 00:25:06 +0200	[diff] [blame]	1	"""Interface to the liblzma compression library.
				2
				3	This module provides a class for reading and writing compressed files,
				4	classes for incremental (de)compression, and convenience functions for
				5	one-shot (de)compression.
				6
				7	These classes and functions support both the XZ and legacy LZMA
				8	container formats, as well as raw compressed data streams.
				9	"""
				10
				11	__all__ = [
				12	"CHECK_NONE", "CHECK_CRC32", "CHECK_CRC64", "CHECK_SHA256",
				13	"CHECK_ID_MAX", "CHECK_UNKNOWN",
				14	"FILTER_LZMA1", "FILTER_LZMA2", "FILTER_DELTA", "FILTER_X86", "FILTER_IA64",
				15	"FILTER_ARM", "FILTER_ARMTHUMB", "FILTER_POWERPC", "FILTER_SPARC",
				16	"FORMAT_AUTO", "FORMAT_XZ", "FORMAT_ALONE", "FORMAT_RAW",
				17	"MF_HC3", "MF_HC4", "MF_BT2", "MF_BT3", "MF_BT4",
				18	"MODE_FAST", "MODE_NORMAL", "PRESET_DEFAULT", "PRESET_EXTREME",
				19
				20	"LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError",
Nadeem Vawda	bc459bb	2012-05-06 23:01:51 +0200	[diff] [blame]	21	"compress", "decompress", "is_check_supported",
Nadeem Vawda	f55b329	2012-05-06 23:01:27 +0200	[diff] [blame]	22	"encode_filter_properties", "decode_filter_properties",
Nadeem Vawda	3ff069e	2011-11-30 00:25:06 +0200	[diff] [blame]	23	]
				24
				25	import io
				26	from _lzma import *
				27
				28
				29	_MODE_CLOSED = 0
				30	_MODE_READ = 1
				31	_MODE_READ_EOF = 2
				32	_MODE_WRITE = 3
				33
				34	_BUFFER_SIZE = 8192
				35
				36
				37	class LZMAFile(io.BufferedIOBase):
				38
				39	"""A file object providing transparent LZMA (de)compression.
				40
				41	An LZMAFile can act as a wrapper for an existing file object, or
				42	refer directly to a named file on disk.
				43
				44	Note that LZMAFile provides a binary file interface - data read
				45	is returned as bytes, and data to be written must be given as bytes.
				46	"""
				47
				48	def __init__(self, filename=None, mode="r", *,
Nadeem Vawda	33c34da	2012-06-04 23:34:07 +0200	[diff] [blame]	49	format=None, check=-1, preset=None, filters=None):
				50	"""Open an LZMA-compressed file in binary mode.
Nadeem Vawda	3ff069e	2011-11-30 00:25:06 +0200	[diff] [blame]	51
Nadeem Vawda	33c34da	2012-06-04 23:34:07 +0200	[diff] [blame]	52	filename can be either an actual file name (given as a str or
				53	bytes object), in which case the named file is opened, or it can
				54	be an existing file object to read from or write to.
Nadeem Vawda	3ff069e	2011-11-30 00:25:06 +0200	[diff] [blame]	55
				56	mode can be "r" for reading (default), "w" for (over)writing, or
Nadeem Vawda	6cbb20c	2012-06-04 23:36:24 +0200	[diff] [blame^]	57	"a" for appending. These can equivalently be given as "rb", "wb",
				58	and "ab" respectively.
Nadeem Vawda	3ff069e	2011-11-30 00:25:06 +0200	[diff] [blame]	59
				60	format specifies the container format to use for the file.
				61	If mode is "r", this defaults to FORMAT_AUTO. Otherwise, the
				62	default is FORMAT_XZ.
				63
				64	check specifies the integrity check to use. This argument can
				65	only be used when opening a file for writing. For FORMAT_XZ,
				66	the default is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not
				67	support integrity checks - for these formats, check must be
				68	omitted, or be CHECK_NONE.
				69
				70	When opening a file for reading, the preset argument is not
				71	meaningful, and should be omitted. The filters argument should
				72	also be omitted, except when format is FORMAT_RAW (in which case
				73	it is required).
				74
				75	When opening a file for writing, the settings used by the
				76	compressor can be specified either as a preset compression
				77	level (with the preset argument), or in detail as a custom
				78	filter chain (with the filters argument). For FORMAT_XZ and
				79	FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset
				80	level. For FORMAT_RAW, the caller must always specify a filter
				81	chain; the raw compressor does not support preset compression
				82	levels.
				83
				84	preset (if provided) should be an integer in the range 0-9,
				85	optionally OR-ed with the constant PRESET_EXTREME.
				86
				87	filters (if provided) should be a sequence of dicts. Each dict
				88	should have an entry for "id" indicating ID of the filter, plus
				89	additional entries for options to the filter.
				90	"""
				91	self._fp = None
				92	self._closefp = False
				93	self._mode = _MODE_CLOSED
				94	self._pos = 0
				95	self._size = -1
				96
Nadeem Vawda	6cbb20c	2012-06-04 23:36:24 +0200	[diff] [blame^]	97	if mode in ("r", "rb"):
Nadeem Vawda	3ff069e	2011-11-30 00:25:06 +0200	[diff] [blame]	98	if check != -1:
				99	raise ValueError("Cannot specify an integrity check "
				100	"when opening a file for reading")
				101	if preset is not None:
				102	raise ValueError("Cannot specify a preset compression "
				103	"level when opening a file for reading")
				104	if format is None:
				105	format = FORMAT_AUTO
				106	mode_code = _MODE_READ
				107	# Save the args to pass to the LZMADecompressor initializer.
				108	# If the file contains multiple compressed streams, each
				109	# stream will need a separate decompressor object.
				110	self._init_args = {"format":format, "filters":filters}
				111	self._decompressor = LZMADecompressor(**self._init_args)
				112	self._buffer = None
Nadeem Vawda	6cbb20c	2012-06-04 23:36:24 +0200	[diff] [blame^]	113	elif mode in ("w", "wb", "a", "ab"):
Nadeem Vawda	3ff069e	2011-11-30 00:25:06 +0200	[diff] [blame]	114	if format is None:
				115	format = FORMAT_XZ
				116	mode_code = _MODE_WRITE
				117	self._compressor = LZMACompressor(format=format, check=check,
				118	preset=preset, filters=filters)
				119	else:
				120	raise ValueError("Invalid mode: {!r}".format(mode))
				121
Nadeem Vawda	33c34da	2012-06-04 23:34:07 +0200	[diff] [blame]	122	if isinstance(filename, (str, bytes)):
Nadeem Vawda	6cbb20c	2012-06-04 23:36:24 +0200	[diff] [blame^]	123	if "b" not in mode:
				124	mode += "b"
Nadeem Vawda	3ff069e	2011-11-30 00:25:06 +0200	[diff] [blame]	125	self._fp = open(filename, mode)
				126	self._closefp = True
				127	self._mode = mode_code
Nadeem Vawda	33c34da	2012-06-04 23:34:07 +0200	[diff] [blame]	128	elif hasattr(filename, "read") or hasattr(filename, "write"):
				129	self._fp = filename
Nadeem Vawda	3ff069e	2011-11-30 00:25:06 +0200	[diff] [blame]	130	self._mode = mode_code
				131	else:
Nadeem Vawda	33c34da	2012-06-04 23:34:07 +0200	[diff] [blame]	132	raise TypeError("filename must be a str or bytes object, or a file")
Nadeem Vawda	3ff069e	2011-11-30 00:25:06 +0200	[diff] [blame]	133
				134	def close(self):
				135	"""Flush and close the file.
				136
				137	May be called more than once without error. Once the file is
				138	closed, any other operation on it will raise a ValueError.
				139	"""
				140	if self._mode == _MODE_CLOSED:
				141	return
				142	try:
				143	if self._mode in (_MODE_READ, _MODE_READ_EOF):
				144	self._decompressor = None
				145	self._buffer = None
				146	elif self._mode == _MODE_WRITE:
				147	self._fp.write(self._compressor.flush())
				148	self._compressor = None
				149	finally:
				150	try:
				151	if self._closefp:
				152	self._fp.close()
				153	finally:
				154	self._fp = None
				155	self._closefp = False
				156	self._mode = _MODE_CLOSED
				157
				158	@property
				159	def closed(self):
				160	"""True if this file is closed."""
				161	return self._mode == _MODE_CLOSED
				162
				163	def fileno(self):
				164	"""Return the file descriptor for the underlying file."""
				165	self._check_not_closed()
				166	return self._fp.fileno()
				167
				168	def seekable(self):
				169	"""Return whether the file supports seeking."""
Nadeem Vawda	ae557d7	2012-02-12 01:51:38 +0200	[diff] [blame]	170	return self.readable() and self._fp.seekable()
Nadeem Vawda	3ff069e	2011-11-30 00:25:06 +0200	[diff] [blame]	171
				172	def readable(self):
				173	"""Return whether the file was opened for reading."""
				174	self._check_not_closed()
				175	return self._mode in (_MODE_READ, _MODE_READ_EOF)
				176
				177	def writable(self):
				178	"""Return whether the file was opened for writing."""
				179	self._check_not_closed()
				180	return self._mode == _MODE_WRITE
				181
				182	# Mode-checking helper functions.
				183
				184	def _check_not_closed(self):
				185	if self.closed:
				186	raise ValueError("I/O operation on closed file")
				187
				188	def _check_can_read(self):
				189	if not self.readable():
				190	raise io.UnsupportedOperation("File not open for reading")
				191
				192	def _check_can_write(self):
				193	if not self.writable():
				194	raise io.UnsupportedOperation("File not open for writing")
				195
				196	def _check_can_seek(self):
Nadeem Vawda	ae557d7	2012-02-12 01:51:38 +0200	[diff] [blame]	197	if not self.readable():
Nadeem Vawda	3ff069e	2011-11-30 00:25:06 +0200	[diff] [blame]	198	raise io.UnsupportedOperation("Seeking is only supported "
				199	"on files open for reading")
Nadeem Vawda	ae557d7	2012-02-12 01:51:38 +0200	[diff] [blame]	200	if not self._fp.seekable():
				201	raise io.UnsupportedOperation("The underlying file object "
				202	"does not support seeking")
Nadeem Vawda	3ff069e	2011-11-30 00:25:06 +0200	[diff] [blame]	203
				204	# Fill the readahead buffer if it is empty. Returns False on EOF.
				205	def _fill_buffer(self):
				206	if self._buffer:
				207	return True
				208
				209	if self._decompressor.unused_data:
				210	rawblock = self._decompressor.unused_data
				211	else:
				212	rawblock = self._fp.read(_BUFFER_SIZE)
				213
				214	if not rawblock:
				215	if self._decompressor.eof:
				216	self._mode = _MODE_READ_EOF
				217	self._size = self._pos
				218	return False
				219	else:
				220	raise EOFError("Compressed file ended before the "
				221	"end-of-stream marker was reached")
				222
				223	# Continue to next stream.
				224	if self._decompressor.eof:
				225	self._decompressor = LZMADecompressor(**self._init_args)
				226
				227	self._buffer = self._decompressor.decompress(rawblock)
				228	return True
				229
				230	# Read data until EOF.
				231	# If return_data is false, consume the data without returning it.
				232	def _read_all(self, return_data=True):
				233	blocks = []
				234	while self._fill_buffer():
				235	if return_data:
				236	blocks.append(self._buffer)
				237	self._pos += len(self._buffer)
				238	self._buffer = None
				239	if return_data:
				240	return b"".join(blocks)
				241
				242	# Read a block of up to n bytes.
				243	# If return_data is false, consume the data without returning it.
				244	def _read_block(self, n, return_data=True):
				245	blocks = []
				246	while n > 0 and self._fill_buffer():
				247	if n < len(self._buffer):
				248	data = self._buffer[:n]
				249	self._buffer = self._buffer[n:]
				250	else:
				251	data = self._buffer
				252	self._buffer = None
				253	if return_data:
				254	blocks.append(data)
				255	self._pos += len(data)
				256	n -= len(data)
				257	if return_data:
				258	return b"".join(blocks)
				259
				260	def peek(self, size=-1):
				261	"""Return buffered data without advancing the file position.
				262
				263	Always returns at least one byte of data, unless at EOF.
				264	The exact number of bytes returned is unspecified.
				265	"""
				266	self._check_can_read()
				267	if self._mode == _MODE_READ_EOF or not self._fill_buffer():
				268	return b""
				269	return self._buffer
				270
				271	def read(self, size=-1):
				272	"""Read up to size uncompressed bytes from the file.
				273
				274	If size is negative or omitted, read until EOF is reached.
				275	Returns b"" if the file is already at EOF.
				276	"""
				277	self._check_can_read()
				278	if self._mode == _MODE_READ_EOF or size == 0:
				279	return b""
				280	elif size < 0:
				281	return self._read_all()
				282	else:
				283	return self._read_block(size)
				284
				285	def read1(self, size=-1):
				286	"""Read up to size uncompressed bytes with at most one read
				287	from the underlying stream.
				288
				289	Returns b"" if the file is at EOF.
				290	"""
				291	self._check_can_read()
				292	if (size == 0 or self._mode == _MODE_READ_EOF or
				293	not self._fill_buffer()):
				294	return b""
				295	if 0 < size < len(self._buffer):
				296	data = self._buffer[:size]
				297	self._buffer = self._buffer[size:]
				298	else:
				299	data = self._buffer
				300	self._buffer = None
				301	self._pos += len(data)
				302	return data
				303
				304	def write(self, data):
				305	"""Write a bytes object to the file.
				306
				307	Returns the number of uncompressed bytes written, which is
				308	always len(data). Note that due to buffering, the file on disk
				309	may not reflect the data written until close() is called.
				310	"""
				311	self._check_can_write()
				312	compressed = self._compressor.compress(data)
				313	self._fp.write(compressed)
				314	self._pos += len(data)
				315	return len(data)
				316
				317	# Rewind the file to the beginning of the data stream.
				318	def _rewind(self):
				319	self._fp.seek(0, 0)
				320	self._mode = _MODE_READ
				321	self._pos = 0
				322	self._decompressor = LZMADecompressor(**self._init_args)
				323	self._buffer = None
				324
				325	def seek(self, offset, whence=0):
				326	"""Change the file position.
				327
				328	The new position is specified by offset, relative to the
				329	position indicated by whence. Possible values for whence are:
				330
				331	0: start of stream (default): offset must not be negative
				332	1: current stream position
				333	2: end of stream; offset must not be positive
				334
				335	Returns the new file position.
				336
				337	Note that seeking is emulated, sp depending on the parameters,
				338	this operation may be extremely slow.
				339	"""
				340	self._check_can_seek()
				341
				342	# Recalculate offset as an absolute file position.
				343	if whence == 0:
				344	pass
				345	elif whence == 1:
				346	offset = self._pos + offset
				347	elif whence == 2:
				348	# Seeking relative to EOF - we need to know the file's size.
				349	if self._size < 0:
				350	self._read_all(return_data=False)
				351	offset = self._size + offset
				352	else:
				353	raise ValueError("Invalid value for whence: {}".format(whence))
				354
				355	# Make it so that offset is the number of bytes to skip forward.
				356	if offset < self._pos:
				357	self._rewind()
				358	else:
				359	offset -= self._pos
				360
				361	# Read and discard data until we reach the desired position.
				362	if self._mode != _MODE_READ_EOF:
				363	self._read_block(offset, return_data=False)
				364
				365	return self._pos
				366
				367	def tell(self):
				368	"""Return the current file position."""
				369	self._check_not_closed()
				370	return self._pos
				371
				372
				373	def compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None):
				374	"""Compress a block of data.
				375
				376	Refer to LZMACompressor's docstring for a description of the
				377	optional arguments format, check, preset and filters.
				378
				379	For incremental compression, use an LZMACompressor object instead.
				380	"""
				381	comp = LZMACompressor(format, check, preset, filters)
				382	return comp.compress(data) + comp.flush()
				383
				384
				385	def decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None):
				386	"""Decompress a block of data.
				387
				388	Refer to LZMADecompressor's docstring for a description of the
				389	optional arguments format, check and filters.
				390
				391	For incremental decompression, use a LZMADecompressor object instead.
				392	"""
				393	results = []
				394	while True:
				395	decomp = LZMADecompressor(format, memlimit, filters)
				396	results.append(decomp.decompress(data))
				397	if not decomp.eof:
				398	raise LZMAError("Compressed data ended before the "
				399	"end-of-stream marker was reached")
				400	if not decomp.unused_data:
				401	return b"".join(results)
				402	# There is unused data left over. Proceed to next stream.
				403	data = decomp.unused_data