Blame - Lib/gzip.py - platform/external/python/cpython3

blob: b9f80bbd9ef522cc96c8b36e9f6461143b383912 [file] [log] [blame]

Guido van Rossum	1526219	1997-04-30 16:04:57 +0000	[diff] [blame]	1	import time
				2	import string
				3	import zlib
				4	import StringIO
				5
				6	# implements a python function that reads and writes a gzipped file
				7	# the user of the file doesn't have to worry about the compression,
				8	# but sequential access is not allowed
				9
				10	# based on Andrew Kuchling's minigzip.py distributed with the zlib module
				11
				12	FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
				13
				14	READ, WRITE = 1, 2
				15
				16	def write32(output, value):
				17	t = divmod(value, 256)
				18	b1 = chr(t[1])
				19
				20	t = divmod(t[0], 256)
				21	b2 = chr(t[1])
				22
				23	t = divmod(t[0], 256)
				24	b3 = chr(t[1])
				25
				26	t = divmod(t[0], 256)
				27	b4 = chr(t[1])
				28
				29	buf = b1 + b2 + b3 + b4
				30	output.write(buf)
				31
				32
				33	def read32(input):
				34	buf = input.read(4)
				35	v = ord(buf[0])
				36	v = v + (ord(buf[1]) << 8)
				37	v = v + (ord(buf[2]) << 16)
				38	v = v + (ord(buf[3]) << 24)
				39	return v
				40
				41	written = []
				42
				43	_py_open = open
				44
				45	def open(filename, mode, compresslevel=9):
				46	return GzipFile(filename, mode, compresslevel)
				47
				48	class GzipFile:
				49
				50	def __init__(self, filename, mode='r', compresslevel=9):
				51	if mode == 'r' or mode == 'rb':
				52	self.mode = READ
				53	self._init_read()
				54	self.filename = filename
				55	self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
				56
				57	elif mode == 'w' or mode == 'wb':
				58	self.mode = WRITE
				59	self._init_write(filename)
				60	self.compress = zlib.compressobj(compresslevel,
				61	zlib.DEFLATED,
				62	-zlib.MAX_WBITS,
				63	zlib.DEF_MEM_LEVEL,
				64	0)
				65	else:
				66	raise ValueError, "Mode " + mode + " not supported"
				67
				68	self.fileobj = _py_open(self.filename,mode)
				69
				70	if self.mode == WRITE:
				71	self._write_gzip_header()
				72	elif self.mode == READ:
				73	self._read_gzip_header()
				74
				75
				76	def __repr__(self):
				77	s = repr(self.fileobj)
				78	return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
				79
				80	def _init_write(self, filename):
				81	if filename[-3:] != '.gz':
				82	filename = filename + '.gz'
				83	self.filename = filename
				84	self.crc = zlib.crc32("")
				85	self.size = 0
				86	self.writebuf = []
				87	self.bufsize = 0
				88
				89	def _write_gzip_header(self):
				90	self.fileobj.write('\037\213') # magic header
				91	self.fileobj.write('\010') # compression method
				92	self.fileobj.write(chr(FNAME))
				93	write32(self.fileobj, int(time.time()))
				94	self.fileobj.write('\002')
				95	self.fileobj.write('\377')
				96	self.fileobj.write(self.filename[:-3] + '\000')
				97
				98	def _init_read(self):
				99	self.crc = zlib.crc32("")
				100	self.size = 0
				101	self.extrabuf = ""
				102	self.extrasize = 0
				103
				104	def _read_gzip_header(self):
				105	magic = self.fileobj.read(2)
				106	if magic != '\037\213':
				107	raise RuntimeError, 'Not a gzipped file'
				108	method = ord( self.fileobj.read(1) )
				109	if method != 8:
				110	raise RuntimeError, 'Unknown compression method'
				111	flag = ord( self.fileobj.read(1) )
				112	# modtime = self.fileobj.read(4)
				113	# extraflag = self.fileobj.read(1)
				114	# os = self.fileobj.read(1)
				115	self.fileobj.read(6)
				116
				117	if flag & FEXTRA:
				118	# Read & discard the extra field, if present
				119	xlen=ord(self.fileobj.read(1))
				120	xlen=xlen+256*ord(self.fileobj.read(1))
				121	self.fileobj.read(xlen)
				122	if flag & FNAME:
				123	# Read and discard a null-terminated string containing the filename
				124	while (1):
				125	s=self.fileobj.read(1)
				126	if s=='\000': break
				127	if flag & FCOMMENT:
				128	# Read and discard a null-terminated string containing a comment
				129	while (1):
				130	s=self.fileobj.read(1)
				131	if s=='\000': break
				132	if flag & FHCRC:
				133	self.fileobj.read(2) # Read & discard the 16-bit header CRC
				134
				135
				136	def write(self,data):
				137	if len(data) > 0:
				138	self.size = self.size + len(data)
				139	self.crc = zlib.crc32(data, self.crc)
				140	self.fileobj.write( self.compress.compress(data) )
				141
				142	def writelines(self,lines):
				143	self.write(string.join(lines))
				144
				145	def read(self,size=None):
				146	if self.extrasize <= 0 and self.fileobj.closed:
				147	return ''
				148
				149	if not size:
				150	# get the whole thing
				151	try:
				152	while 1:
				153	self._read()
				154	except EOFError:
				155	size = self.extrasize
				156	else:
				157	# just get some more of it
				158	try:
				159	while size > self.extrasize:
				160	self._read()
				161	except EOFError:
				162	pass
				163
				164	chunk = self.extrabuf[:size]
				165	self.extrabuf = self.extrabuf[size:]
				166	self.extrasize = self.extrasize - size
				167
				168	return chunk
				169
				170	def _read(self):
				171	buf = self.fileobj.read(1024)
				172	if buf == "":
				173	uncompress = self.decompress.flush()
				174	if uncompress == "":
				175	self._read_eof()
				176	self.fileobj.close()
				177	raise EOFError, 'Reached EOF'
				178	else:
				179	uncompress = self.decompress.decompress(buf)
				180	self.crc = zlib.crc32(uncompress, self.crc)
				181	self.extrabuf = self.extrabuf + uncompress
				182	self.extrasize = self.extrasize + len(uncompress)
				183	self.size = self.size + len(uncompress)
				184
				185	def _read_eof(self):
				186	# Andrew writes:
				187	## We've read to the end of the file, so we have to rewind in order
				188	## to reread the 8 bytes containing the CRC and the file size. The
				189	## decompressor is smart and knows when to stop, so feeding it
				190	## extra data is harmless.
				191	self.fileobj.seek(-8, 2)
				192	crc32 = read32(self.fileobj)
				193	isize = read32(self.fileobj)
				194	if crc32 != self.crc:
				195	self.error = "CRC check failed"
				196	elif isize != self.size:
				197	self.error = "Incorrect length of data produced"
				198
				199	def close(self):
				200	if self.mode == WRITE:
				201	self.fileobj.write(self.compress.flush())
				202	write32(self.fileobj, self.crc)
				203	write32(self.fileobj, self.size)
				204	self.fileobj.close()
				205	elif self.mode == READ:
				206	self.fileobj.close()
				207
				208	def flush(self):
				209	self.fileobj.flush()
				210
				211	def seek(self):
				212	raise IOError, 'Random access not allowed in gzip files'
				213
				214	def tell(self):
				215	raise IOError, 'I won\'t tell() you for gzip files'
				216
				217	def isatty(self):
				218	return 0
				219
				220	def readline(self):
				221	# should I bother with this
				222	raise RuntimeError, "not implemented"
				223
				224	def readlines(self):
				225	# should I bother with this
				226	raise RuntimeError, "not implemented"
				227
				228
				229	class StringIOgz(GzipFile):
				230
				231	"""A StringIO substitute that reads/writes gzipped buffers."""
				232
				233	def __init__(self, buf=None, filename="StringIOgz"):
				234	"""Read/write mode depends on first argument.
				235
				236	If __init__ is passed a buffer, it will treat that as the
				237	gzipped data and set up the StringIO for reading. Without the
				238	initial argument, it will assume a new file for writing.
				239
				240	The filename argument is written in the header of buffers
				241	opened for writing. Not sure that this is useful, but the
				242	GzipFile code expects some filename."""
				243
				244	if buf:
				245	self.mode = READ
				246	self._init_read()
				247	self.filename = filename
				248	self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
				249	self.fileobj = StringIO.StringIO(buf)
				250	else:
				251	self.mode = WRITE
				252	self._init_write(filename)
				253	self.compress = zlib.compressobj(compresslevel,
				254	zlib.DEFLATED,
				255	-zlib.MAX_WBITS,
				256	zlib.DEF_MEM_LEVEL,
				257	0)
				258	self.fileobj = StringIO.StringIO()
				259
				260	if self.mode == WRITE:
				261	self._write_gzip_header()
				262	elif self.mode == READ:
				263	self._read_gzip_header()
				264