blob: 8a2a7184df0ac18bb36520410f45174e8a2e6573 [file] [log] [blame]
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00001"""Functions that read and write gzipped files.
2
Guido van Rossum54f22ed2000-02-04 15:10:34 +00003The user of the file doesn't have to worry about the compression,
4but random access is not allowed."""
5
6# based on Andrew Kuchling's minigzip.py distributed with the zlib module
7
Tim Peters49667c22004-07-27 21:05:21 +00008import struct, sys, time
Guido van Rossum15262191997-04-30 16:04:57 +00009import zlib
Georg Brandl1a3284e2007-12-02 09:40:06 +000010import builtins
Guido van Rossum15262191997-04-30 16:04:57 +000011
Skip Montanaro2dd42762001-01-23 15:35:05 +000012__all__ = ["GzipFile","open"]
13
Guido van Rossum15262191997-04-30 16:04:57 +000014FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
15
16READ, WRITE = 1, 2
17
Tim Petersfb0ea522002-11-04 19:50:11 +000018def U32(i):
19 """Return i as an unsigned integer, assuming it fits in 32 bits.
Tim Petersfb0ea522002-11-04 19:50:11 +000020 If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
21 """
22 if i < 0:
Guido van Rossume2a383d2007-01-15 16:59:06 +000023 i += 1 << 32
Tim Petersfb0ea522002-11-04 19:50:11 +000024 return i
25
Tim Peters9288f952002-11-05 20:38:55 +000026def LOWU32(i):
Christian Heimesfe337bf2008-03-23 21:54:12 +000027 """Return the low-order 32 bits, as a non-negative int"""
Guido van Rossume2a383d2007-01-15 16:59:06 +000028 return i & 0xFFFFFFFF
Tim Peters9288f952002-11-05 20:38:55 +000029
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000030def write32u(output, value):
Tim Petersfb0ea522002-11-04 19:50:11 +000031 # The L format writes the bit pattern correctly whether signed
32 # or unsigned.
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000033 output.write(struct.pack("<L", value))
34
Guido van Rossum15262191997-04-30 16:04:57 +000035def read32(input):
Christian Heimesfe337bf2008-03-23 21:54:12 +000036 return struct.unpack("<I", input.read(4))[0]
Guido van Rossum15262191997-04-30 16:04:57 +000037
Fred Drakefa1591c1999-04-05 18:37:59 +000038def open(filename, mode="rb", compresslevel=9):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000039 """Shorthand for GzipFile(filename, mode, compresslevel).
40
41 The filename argument is required; mode defaults to 'rb'
42 and compresslevel defaults to 9.
43
44 """
Guido van Rossum15262191997-04-30 16:04:57 +000045 return GzipFile(filename, mode, compresslevel)
46
47class GzipFile:
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000048 """The GzipFile class simulates most of the methods of a file object with
Guido van Rossum97c5fcc2002-08-06 17:03:25 +000049 the exception of the readinto() and truncate() methods.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000050
51 """
Guido van Rossum15262191997-04-30 16:04:57 +000052
Guido van Rossum68de3791997-07-19 20:22:23 +000053 myfileobj = None
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +000054 max_read_chunk = 10 * 1024 * 1024 # 10Mb
Guido van Rossum68de3791997-07-19 20:22:23 +000055
Tim Peters07e99cb2001-01-14 23:47:14 +000056 def __init__(self, filename=None, mode=None,
Antoine Pitrou42db3ef2009-01-04 21:37:59 +000057 compresslevel=9, fileobj=None, mtime=None):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000058 """Constructor for the GzipFile class.
59
60 At least one of fileobj and filename must be given a
61 non-trivial value.
62
63 The new class instance is based on fileobj, which can be a regular
64 file, a StringIO object, or any other object which simulates a file.
65 It defaults to None, in which case filename is opened to provide
66 a file object.
67
68 When fileobj is not None, the filename argument is only used to be
69 included in the gzip file header, which may includes the original
70 filename of the uncompressed file. It defaults to the filename of
71 fileobj, if discernible; otherwise, it defaults to the empty string,
72 and in this case the original filename is not included in the header.
73
74 The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
75 depending on whether the file will be read or written. The default
76 is the mode of fileobj if discernible; otherwise, the default is 'rb'.
77 Be aware that only the 'rb', 'ab', and 'wb' values should be used
78 for cross-platform portability.
79
80 The compresslevel argument is an integer from 1 to 9 controlling the
81 level of compression; 1 is fastest and produces the least compression,
82 and 9 is slowest and produces the most compression. The default is 9.
83
Antoine Pitrou42db3ef2009-01-04 21:37:59 +000084 The mtime argument is an optional numeric timestamp to be written
85 to the stream when compressing. All gzip compressed streams
86 are required to contain a timestamp. If omitted or None, the
87 current time is used. This module ignores the timestamp when
88 decompressing; however, some programs, such as gunzip, make use
89 of it. The format of the timestamp is the same as that of the
90 return value of time.time() and of the st_mtime member of the
91 object returned by os.stat().
92
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000093 """
94
Skip Montanaro12424bc2002-05-23 01:43:05 +000095 # guarantee the file is opened in binary mode on platforms
96 # that care about that sort of thing
97 if mode and 'b' not in mode:
98 mode += 'b'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000099 if fileobj is None:
Georg Brandl1a3284e2007-12-02 09:40:06 +0000100 fileobj = self.myfileobj = builtins.open(filename, mode or 'rb')
Guido van Rossum68de3791997-07-19 20:22:23 +0000101 if filename is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000102 if hasattr(fileobj, 'name'): filename = fileobj.name
103 else: filename = ''
Guido van Rossum68de3791997-07-19 20:22:23 +0000104 if mode is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000105 if hasattr(fileobj, 'mode'): mode = fileobj.mode
Fred Drake9bb76d11999-04-05 18:33:40 +0000106 else: mode = 'rb'
Guido van Rossum68de3791997-07-19 20:22:23 +0000107
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000108 if mode[0:1] == 'r':
109 self.mode = READ
Tim Peters07e99cb2001-01-14 23:47:14 +0000110 # Set flag indicating start of a new member
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000111 self._new_member = True
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000112 self.extrabuf = b""
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000113 self.extrasize = 0
Thomas Wouterscf297e42007-02-23 15:07:44 +0000114 self.name = filename
Thomas Wouters477c8d52006-05-27 19:21:47 +0000115 # Starts small, scales exponentially
116 self.min_readsize = 100
Guido van Rossum15262191997-04-30 16:04:57 +0000117
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000118 elif mode[0:1] == 'w' or mode[0:1] == 'a':
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000119 self.mode = WRITE
120 self._init_write(filename)
121 self.compress = zlib.compressobj(compresslevel,
Tim Peters07e99cb2001-01-14 23:47:14 +0000122 zlib.DEFLATED,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000123 -zlib.MAX_WBITS,
124 zlib.DEF_MEM_LEVEL,
125 0)
126 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000127 raise IOError("Mode " + mode + " not supported")
Guido van Rossum15262191997-04-30 16:04:57 +0000128
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000129 self.fileobj = fileobj
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000130 self.offset = 0
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000131 self.mtime = mtime
Antoine Pitroue3900542010-10-06 21:29:56 +0000132 self.closed = False
Guido van Rossum15262191997-04-30 16:04:57 +0000133
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000134 if self.mode == WRITE:
135 self._write_gzip_header()
Guido van Rossum15262191997-04-30 16:04:57 +0000136
Thomas Wouterscf297e42007-02-23 15:07:44 +0000137 @property
138 def filename(self):
139 import warnings
Philip Jenveya394f2d2009-05-08 03:57:12 +0000140 warnings.warn("use the name attribute", DeprecationWarning, 2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000141 if self.mode == WRITE and self.name[-3:] != ".gz":
142 return self.name + ".gz"
143 return self.name
144
Guido van Rossum15262191997-04-30 16:04:57 +0000145 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000146 s = repr(self.fileobj)
147 return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
Guido van Rossum15262191997-04-30 16:04:57 +0000148
Antoine Pitroue3900542010-10-06 21:29:56 +0000149 def _check_closed(self):
150 """Raises a ValueError if the underlying file object has been closed.
151
152 """
153 if self.closed:
154 raise ValueError('I/O operation on closed file.')
155
Guido van Rossum15262191997-04-30 16:04:57 +0000156 def _init_write(self, filename):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000157 self.name = filename
Antoine Pitrouc8428d32009-12-14 18:23:30 +0000158 self.crc = zlib.crc32(b"") & 0xffffffff
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000159 self.size = 0
160 self.writebuf = []
161 self.bufsize = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000162
163 def _write_gzip_header(self):
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000164 self.fileobj.write(b'\037\213') # magic header
165 self.fileobj.write(b'\010') # compression method
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000166 try:
Lars Gustäbelead70562007-08-13 09:05:16 +0000167 # RFC 1952 requires the FNAME field to be Latin-1. Do not
168 # include filenames that cannot be represented that way.
169 fname = self.name.encode('latin-1')
170 if fname.endswith(b'.gz'):
171 fname = fname[:-3]
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000172 except UnicodeEncodeError:
Lars Gustäbelead70562007-08-13 09:05:16 +0000173 fname = b''
174 flags = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000175 if fname:
176 flags = FNAME
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000177 self.fileobj.write(chr(flags).encode('latin-1'))
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000178 mtime = self.mtime
179 if mtime is None:
180 mtime = time.time()
181 write32u(self.fileobj, int(mtime))
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000182 self.fileobj.write(b'\002')
183 self.fileobj.write(b'\377')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000184 if fname:
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000185 self.fileobj.write(fname + b'\000')
Guido van Rossum15262191997-04-30 16:04:57 +0000186
187 def _init_read(self):
Antoine Pitrouc8428d32009-12-14 18:23:30 +0000188 self.crc = zlib.crc32(b"") & 0xffffffff
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000189 self.size = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000190
191 def _read_gzip_header(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000192 magic = self.fileobj.read(2)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000193 if magic != b'\037\213':
Collin Winterce36ad82007-08-30 01:19:48 +0000194 raise IOError('Not a gzipped file')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000195 method = ord( self.fileobj.read(1) )
196 if method != 8:
Collin Winterce36ad82007-08-30 01:19:48 +0000197 raise IOError('Unknown compression method')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000198 flag = ord( self.fileobj.read(1) )
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000199 self.mtime = read32(self.fileobj)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000200 # extraflag = self.fileobj.read(1)
201 # os = self.fileobj.read(1)
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000202 self.fileobj.read(2)
Guido van Rossum15262191997-04-30 16:04:57 +0000203
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000204 if flag & FEXTRA:
205 # Read & discard the extra field, if present
Tim Petersfb0ea522002-11-04 19:50:11 +0000206 xlen = ord(self.fileobj.read(1))
207 xlen = xlen + 256*ord(self.fileobj.read(1))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000208 self.fileobj.read(xlen)
209 if flag & FNAME:
210 # Read and discard a null-terminated string containing the filename
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000211 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000212 s = self.fileobj.read(1)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000213 if not s or s==b'\000':
Tim Petersfb0ea522002-11-04 19:50:11 +0000214 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000215 if flag & FCOMMENT:
216 # Read and discard a null-terminated string containing a comment
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000217 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000218 s = self.fileobj.read(1)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000219 if not s or s==b'\000':
Tim Petersfb0ea522002-11-04 19:50:11 +0000220 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000221 if flag & FHCRC:
222 self.fileobj.read(2) # Read & discard the 16-bit header CRC
Guido van Rossum15262191997-04-30 16:04:57 +0000223
224
225 def write(self,data):
Antoine Pitroue3900542010-10-06 21:29:56 +0000226 self._check_closed()
Martin v. Löwisdb044892002-03-11 06:46:52 +0000227 if self.mode != WRITE:
228 import errno
229 raise IOError(errno.EBADF, "write() on read-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000230
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000231 if self.fileobj is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000232 raise ValueError("write() on closed GzipFile object")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000233 if len(data) > 0:
234 self.size = self.size + len(data)
Christian Heimesfe337bf2008-03-23 21:54:12 +0000235 self.crc = zlib.crc32(data, self.crc) & 0xffffffff
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000236 self.fileobj.write( self.compress.compress(data) )
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000237 self.offset += len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000238
Guido van Rossum56068012000-02-02 16:51:06 +0000239 def read(self, size=-1):
Antoine Pitroue3900542010-10-06 21:29:56 +0000240 self._check_closed()
Martin v. Löwisdb044892002-03-11 06:46:52 +0000241 if self.mode != READ:
242 import errno
Brett Cannonedfb3022003-12-04 19:28:06 +0000243 raise IOError(errno.EBADF, "read() on write-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000244
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000245 if self.extrasize <= 0 and self.fileobj is None:
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000246 return b''
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000247
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000248 readsize = 1024
Guido van Rossum56068012000-02-02 16:51:06 +0000249 if size < 0: # get the whole thing
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000250 try:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000251 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000252 self._read(readsize)
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000253 readsize = min(self.max_read_chunk, readsize * 2)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000254 except EOFError:
255 size = self.extrasize
256 else: # just get some more of it
257 try:
258 while size > self.extrasize:
259 self._read(readsize)
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000260 readsize = min(self.max_read_chunk, readsize * 2)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000261 except EOFError:
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000262 if size > self.extrasize:
263 size = self.extrasize
Tim Peters07e99cb2001-01-14 23:47:14 +0000264
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000265 chunk = self.extrabuf[:size]
266 self.extrabuf = self.extrabuf[size:]
267 self.extrasize = self.extrasize - size
Guido van Rossum15262191997-04-30 16:04:57 +0000268
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000269 self.offset += size
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000270 return chunk
Guido van Rossum15262191997-04-30 16:04:57 +0000271
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000272 def _unread(self, buf):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000273 self.extrabuf = buf + self.extrabuf
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000274 self.extrasize = len(buf) + self.extrasize
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000275 self.offset -= len(buf)
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000276
277 def _read(self, size=1024):
Tim Petersfb0ea522002-11-04 19:50:11 +0000278 if self.fileobj is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000279 raise EOFError("Reached EOF")
Tim Peters07e99cb2001-01-14 23:47:14 +0000280
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000281 if self._new_member:
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000282 # If the _new_member flag is set, we have to
283 # jump to the next member, if there is one.
Tim Peters07e99cb2001-01-14 23:47:14 +0000284 #
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000285 # First, check if we're at the end of the file;
286 # if so, it's time to stop; no more members to read.
287 pos = self.fileobj.tell() # Save current position
288 self.fileobj.seek(0, 2) # Seek to end of file
289 if pos == self.fileobj.tell():
Collin Winterce36ad82007-08-30 01:19:48 +0000290 raise EOFError("Reached EOF")
Tim Peters07e99cb2001-01-14 23:47:14 +0000291 else:
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000292 self.fileobj.seek( pos ) # Return to original position
Tim Peters07e99cb2001-01-14 23:47:14 +0000293
294 self._init_read()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000295 self._read_gzip_header()
296 self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000297 self._new_member = False
Tim Peters07e99cb2001-01-14 23:47:14 +0000298
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000299 # Read a chunk of data from the file
300 buf = self.fileobj.read(size)
Tim Peters07e99cb2001-01-14 23:47:14 +0000301
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000302 # If the EOF has been reached, flush the decompression object
303 # and mark this object as finished.
Tim Peters07e99cb2001-01-14 23:47:14 +0000304
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000305 if buf == b"":
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000306 uncompress = self.decompress.flush()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000307 self._read_eof()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000308 self._add_read_data( uncompress )
Collin Winterce36ad82007-08-30 01:19:48 +0000309 raise EOFError('Reached EOF')
Tim Peters07e99cb2001-01-14 23:47:14 +0000310
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000311 uncompress = self.decompress.decompress(buf)
312 self._add_read_data( uncompress )
313
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000314 if self.decompress.unused_data != b"":
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000315 # Ending case: we've come to the end of a member in the file,
316 # so seek back to the start of the unused data, finish up
317 # this member, and read a new gzip header.
318 # (The number of bytes to seek back is the length of the unused
319 # data, minus 8 because _read_eof() will rewind a further 8 bytes)
320 self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
321
322 # Check the CRC and file size, and set the flag so we read
Tim Peters07e99cb2001-01-14 23:47:14 +0000323 # a new member on the next call
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000324 self._read_eof()
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000325 self._new_member = True
Tim Peters07e99cb2001-01-14 23:47:14 +0000326
327 def _add_read_data(self, data):
Christian Heimesfe337bf2008-03-23 21:54:12 +0000328 self.crc = zlib.crc32(data, self.crc) & 0xffffffff
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000329 self.extrabuf = self.extrabuf + data
330 self.extrasize = self.extrasize + len(data)
331 self.size = self.size + len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000332
333 def _read_eof(self):
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000334 # We've read to the end of the file, so we have to rewind in order
Tim Peters07e99cb2001-01-14 23:47:14 +0000335 # to reread the 8 bytes containing the CRC and the file size.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000336 # We check the that the computed CRC and size of the
Tim Peters9288f952002-11-05 20:38:55 +0000337 # uncompressed data matches the stored values. Note that the size
338 # stored is the true file size mod 2**32.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000339 self.fileobj.seek(-8, 1)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000340 crc32 = read32(self.fileobj)
Christian Heimesfe337bf2008-03-23 21:54:12 +0000341 isize = read32(self.fileobj) # may exceed 2GB
342 if crc32 != self.crc:
343 raise IOError("CRC check failed %s != %s" % (hex(crc32),
344 hex(self.crc)))
Christian Heimes1dc54002008-03-24 02:19:29 +0000345 elif isize != (self.size & 0xffffffff):
Collin Winterce36ad82007-08-30 01:19:48 +0000346 raise IOError("Incorrect length of data produced")
Tim Peters07e99cb2001-01-14 23:47:14 +0000347
Guido van Rossum15262191997-04-30 16:04:57 +0000348 def close(self):
Georg Brandlb533e262008-05-25 18:19:30 +0000349 if self.fileobj is None:
350 return
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000351 if self.mode == WRITE:
352 self.fileobj.write(self.compress.flush())
Christian Heimesfe337bf2008-03-23 21:54:12 +0000353 write32u(self.fileobj, self.crc)
Tim Peters9288f952002-11-05 20:38:55 +0000354 # self.size may exceed 2GB, or even 4GB
Christian Heimes1dc54002008-03-24 02:19:29 +0000355 write32u(self.fileobj, self.size & 0xffffffff)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000356 self.fileobj = None
357 elif self.mode == READ:
358 self.fileobj = None
359 if self.myfileobj:
360 self.myfileobj.close()
361 self.myfileobj = None
Antoine Pitroue3900542010-10-06 21:29:56 +0000362 self.closed = True
Guido van Rossum15262191997-04-30 16:04:57 +0000363
Andrew M. Kuchling916fcc31999-08-10 13:19:30 +0000364 def __del__(self):
Jeremy Hyltone298c302000-05-08 16:59:59 +0000365 try:
366 if (self.myfileobj is None and
367 self.fileobj is None):
368 return
369 except AttributeError:
370 return
371 self.close()
Tim Peters07e99cb2001-01-14 23:47:14 +0000372
Martin v. Löwisf2a8d632005-03-03 08:35:22 +0000373 def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
Antoine Pitroue3900542010-10-06 21:29:56 +0000374 self._check_closed()
Martin v. Löwisf2a8d632005-03-03 08:35:22 +0000375 if self.mode == WRITE:
Tim Peterseba28be2005-03-28 01:08:02 +0000376 # Ensure the compressor's buffer is flushed
377 self.fileobj.write(self.compress.flush(zlib_mode))
Mark Dickinson751125d2010-05-04 18:47:50 +0000378 self.fileobj.flush()
Guido van Rossum15262191997-04-30 16:04:57 +0000379
Tim Peters5cfb05e2004-07-27 21:02:02 +0000380 def fileno(self):
381 """Invoke the underlying file object's fileno() method.
382
383 This will raise AttributeError if the underlying file object
384 doesn't support fileno().
385 """
386 return self.fileobj.fileno()
387
Guido van Rossum15262191997-04-30 16:04:57 +0000388 def isatty(self):
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000389 return False
Guido van Rossum15262191997-04-30 16:04:57 +0000390
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000391 def tell(self):
Antoine Pitroue3900542010-10-06 21:29:56 +0000392 self._check_closed()
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000393 return self.offset
394
395 def rewind(self):
396 '''Return the uncompressed stream file position indicator to the
Tim Petersab9ba272001-08-09 21:40:30 +0000397 beginning of the file'''
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000398 if self.mode != READ:
399 raise IOError("Can't rewind in write mode")
400 self.fileobj.seek(0)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000401 self._new_member = True
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000402 self.extrabuf = b""
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000403 self.extrasize = 0
404 self.offset = 0
405
Thomas Wouters89f507f2006-12-13 04:49:30 +0000406 def seek(self, offset, whence=0):
407 if whence:
408 if whence == 1:
409 offset = self.offset + offset
410 else:
411 raise ValueError('Seek from end not supported')
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000412 if self.mode == WRITE:
413 if offset < self.offset:
414 raise IOError('Negative seek in write mode')
415 count = offset - self.offset
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000416 chunk = bytes(1024)
Tim Petersfb0ea522002-11-04 19:50:11 +0000417 for i in range(count // 1024):
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000418 self.write(chunk)
419 self.write(bytes(count % 1024))
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000420 elif self.mode == READ:
421 if offset < self.offset:
422 # for negative seek, rewind and do positive seek
423 self.rewind()
424 count = offset - self.offset
Tim Petersfb0ea522002-11-04 19:50:11 +0000425 for i in range(count // 1024):
426 self.read(1024)
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000427 self.read(count % 1024)
428
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000429 def readline(self, size=-1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000430 if size < 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000431 size = sys.maxsize
Thomas Wouters477c8d52006-05-27 19:21:47 +0000432 readsize = self.min_readsize
433 else:
434 readsize = size
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000435 bufs = []
Thomas Wouters477c8d52006-05-27 19:21:47 +0000436 while size != 0:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000437 c = self.read(readsize)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000438 i = c.find(b'\n')
Thomas Wouters477c8d52006-05-27 19:21:47 +0000439
440 # We set i=size to break out of the loop under two
441 # conditions: 1) there's no newline, and the chunk is
442 # larger than size, or 2) there is a newline, but the
443 # resulting line would be longer than 'size'.
444 if (size <= i) or (i == -1 and len(c) > size):
445 i = size - 1
Guido van Rossum15262191997-04-30 16:04:57 +0000446
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000447 if i >= 0 or c == b'':
Thomas Wouters477c8d52006-05-27 19:21:47 +0000448 bufs.append(c[:i + 1]) # Add portion of last chunk
449 self._unread(c[i + 1:]) # Push back rest of chunk
450 break
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000451
452 # Append chunk to list, decrease 'size',
453 bufs.append(c)
454 size = size - len(c)
455 readsize = min(size, readsize * 2)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000456 if readsize > self.min_readsize:
457 self.min_readsize = min(readsize, self.min_readsize * 2, 512)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000458 return b''.join(bufs) # Return resulting line
Tim Peters07e99cb2001-01-14 23:47:14 +0000459
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000460 def readlines(self, sizehint=0):
461 # Negative numbers result in reading all the lines
Tim Petersfb0ea522002-11-04 19:50:11 +0000462 if sizehint <= 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000463 sizehint = sys.maxsize
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000464 L = []
465 while sizehint > 0:
466 line = self.readline()
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000467 if line == b"":
Tim Petersfb0ea522002-11-04 19:50:11 +0000468 break
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000469 L.append(line)
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000470 sizehint = sizehint - len(line)
471
472 return L
Guido van Rossum15262191997-04-30 16:04:57 +0000473
Guido van Rossum68de3791997-07-19 20:22:23 +0000474 def writelines(self, L):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000475 for line in L:
476 self.write(line)
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000477
Neil Schemenauercacbdf62002-03-20 18:36:00 +0000478 def __iter__(self):
479 return self
480
Georg Brandla18af4e2007-04-21 15:47:16 +0000481 def __next__(self):
Neil Schemenauercacbdf62002-03-20 18:36:00 +0000482 line = self.readline()
483 if line:
484 return line
485 else:
486 raise StopIteration
487
Antoine Pitrou308705e2009-01-10 16:22:51 +0000488 def __enter__(self):
489 if self.fileobj is None:
490 raise ValueError("I/O operation on closed GzipFile object")
491 return self
492
493 def __exit__(self, *args):
494 self.close()
495
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000496
497def _test():
498 # Act like gzip; with -d, act like gunzip.
499 # The input file is not deleted, however, nor are any other gzip
500 # options or features supported.
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000501 args = sys.argv[1:]
502 decompress = args and args[0] == "-d"
503 if decompress:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000504 args = args[1:]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000505 if not args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000506 args = ["-"]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000507 for arg in args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000508 if decompress:
509 if arg == "-":
Antoine Pitrou9d625c22009-01-04 21:11:10 +0000510 f = GzipFile(filename="", mode="rb", fileobj=sys.stdin.buffer)
511 g = sys.stdout.buffer
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000512 else:
513 if arg[-3:] != ".gz":
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000514 print("filename doesn't end in .gz:", repr(arg))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000515 continue
516 f = open(arg, "rb")
Georg Brandl1a3284e2007-12-02 09:40:06 +0000517 g = builtins.open(arg[:-3], "wb")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000518 else:
519 if arg == "-":
Antoine Pitrou9d625c22009-01-04 21:11:10 +0000520 f = sys.stdin.buffer
521 g = GzipFile(filename="", mode="wb", fileobj=sys.stdout.buffer)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000522 else:
Georg Brandl1a3284e2007-12-02 09:40:06 +0000523 f = builtins.open(arg, "rb")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000524 g = open(arg + ".gz", "wb")
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000525 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000526 chunk = f.read(1024)
527 if not chunk:
528 break
529 g.write(chunk)
530 if g is not sys.stdout:
531 g.close()
532 if f is not sys.stdin:
533 f.close()
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000534
535if __name__ == '__main__':
536 _test()