blob: 11d557172fd0de800d9cd37030cfb33bd71f3293 [file] [log] [blame]
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00001"""Functions that read and write gzipped files.
2
Guido van Rossum54f22ed2000-02-04 15:10:34 +00003The user of the file doesn't have to worry about the compression,
4but random access is not allowed."""
5
6# based on Andrew Kuchling's minigzip.py distributed with the zlib module
7
Tim Peters49667c22004-07-27 21:05:21 +00008import struct, sys, time
Guido van Rossum15262191997-04-30 16:04:57 +00009import zlib
Georg Brandl1a3284e2007-12-02 09:40:06 +000010import builtins
Guido van Rossum15262191997-04-30 16:04:57 +000011
Skip Montanaro2dd42762001-01-23 15:35:05 +000012__all__ = ["GzipFile","open"]
13
Guido van Rossum15262191997-04-30 16:04:57 +000014FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
15
16READ, WRITE = 1, 2
17
Tim Petersfb0ea522002-11-04 19:50:11 +000018def U32(i):
19 """Return i as an unsigned integer, assuming it fits in 32 bits.
Tim Petersfb0ea522002-11-04 19:50:11 +000020 If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
21 """
22 if i < 0:
Guido van Rossume2a383d2007-01-15 16:59:06 +000023 i += 1 << 32
Tim Petersfb0ea522002-11-04 19:50:11 +000024 return i
25
Tim Peters9288f952002-11-05 20:38:55 +000026def LOWU32(i):
Christian Heimesfe337bf2008-03-23 21:54:12 +000027 """Return the low-order 32 bits, as a non-negative int"""
Guido van Rossume2a383d2007-01-15 16:59:06 +000028 return i & 0xFFFFFFFF
Tim Peters9288f952002-11-05 20:38:55 +000029
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000030def write32u(output, value):
Tim Petersfb0ea522002-11-04 19:50:11 +000031 # The L format writes the bit pattern correctly whether signed
32 # or unsigned.
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000033 output.write(struct.pack("<L", value))
34
Guido van Rossum15262191997-04-30 16:04:57 +000035def read32(input):
Christian Heimesfe337bf2008-03-23 21:54:12 +000036 return struct.unpack("<I", input.read(4))[0]
Guido van Rossum15262191997-04-30 16:04:57 +000037
Fred Drakefa1591c1999-04-05 18:37:59 +000038def open(filename, mode="rb", compresslevel=9):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000039 """Shorthand for GzipFile(filename, mode, compresslevel).
40
41 The filename argument is required; mode defaults to 'rb'
42 and compresslevel defaults to 9.
43
44 """
Guido van Rossum15262191997-04-30 16:04:57 +000045 return GzipFile(filename, mode, compresslevel)
46
47class GzipFile:
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000048 """The GzipFile class simulates most of the methods of a file object with
Guido van Rossum97c5fcc2002-08-06 17:03:25 +000049 the exception of the readinto() and truncate() methods.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000050
51 """
Guido van Rossum15262191997-04-30 16:04:57 +000052
Guido van Rossum68de3791997-07-19 20:22:23 +000053 myfileobj = None
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +000054 max_read_chunk = 10 * 1024 * 1024 # 10Mb
Guido van Rossum68de3791997-07-19 20:22:23 +000055
Tim Peters07e99cb2001-01-14 23:47:14 +000056 def __init__(self, filename=None, mode=None,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000057 compresslevel=9, fileobj=None):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000058 """Constructor for the GzipFile class.
59
60 At least one of fileobj and filename must be given a
61 non-trivial value.
62
63 The new class instance is based on fileobj, which can be a regular
64 file, a StringIO object, or any other object which simulates a file.
65 It defaults to None, in which case filename is opened to provide
66 a file object.
67
68 When fileobj is not None, the filename argument is only used to be
69 included in the gzip file header, which may includes the original
70 filename of the uncompressed file. It defaults to the filename of
71 fileobj, if discernible; otherwise, it defaults to the empty string,
72 and in this case the original filename is not included in the header.
73
74 The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
75 depending on whether the file will be read or written. The default
76 is the mode of fileobj if discernible; otherwise, the default is 'rb'.
77 Be aware that only the 'rb', 'ab', and 'wb' values should be used
78 for cross-platform portability.
79
80 The compresslevel argument is an integer from 1 to 9 controlling the
81 level of compression; 1 is fastest and produces the least compression,
82 and 9 is slowest and produces the most compression. The default is 9.
83
84 """
85
Skip Montanaro12424bc2002-05-23 01:43:05 +000086 # guarantee the file is opened in binary mode on platforms
87 # that care about that sort of thing
88 if mode and 'b' not in mode:
89 mode += 'b'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000090 if fileobj is None:
Georg Brandl1a3284e2007-12-02 09:40:06 +000091 fileobj = self.myfileobj = builtins.open(filename, mode or 'rb')
Guido van Rossum68de3791997-07-19 20:22:23 +000092 if filename is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000093 if hasattr(fileobj, 'name'): filename = fileobj.name
94 else: filename = ''
Guido van Rossum68de3791997-07-19 20:22:23 +000095 if mode is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000096 if hasattr(fileobj, 'mode'): mode = fileobj.mode
Fred Drake9bb76d11999-04-05 18:33:40 +000097 else: mode = 'rb'
Guido van Rossum68de3791997-07-19 20:22:23 +000098
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000099 if mode[0:1] == 'r':
100 self.mode = READ
Tim Peters07e99cb2001-01-14 23:47:14 +0000101 # Set flag indicating start of a new member
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000102 self._new_member = True
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000103 self.extrabuf = b""
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000104 self.extrasize = 0
Thomas Wouterscf297e42007-02-23 15:07:44 +0000105 self.name = filename
Thomas Wouters477c8d52006-05-27 19:21:47 +0000106 # Starts small, scales exponentially
107 self.min_readsize = 100
Guido van Rossum15262191997-04-30 16:04:57 +0000108
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000109 elif mode[0:1] == 'w' or mode[0:1] == 'a':
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000110 self.mode = WRITE
111 self._init_write(filename)
112 self.compress = zlib.compressobj(compresslevel,
Tim Peters07e99cb2001-01-14 23:47:14 +0000113 zlib.DEFLATED,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000114 -zlib.MAX_WBITS,
115 zlib.DEF_MEM_LEVEL,
116 0)
117 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000118 raise IOError("Mode " + mode + " not supported")
Guido van Rossum15262191997-04-30 16:04:57 +0000119
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000120 self.fileobj = fileobj
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000121 self.offset = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000122
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000123 if self.mode == WRITE:
124 self._write_gzip_header()
Guido van Rossum15262191997-04-30 16:04:57 +0000125
Thomas Wouterscf297e42007-02-23 15:07:44 +0000126 @property
127 def filename(self):
128 import warnings
129 warnings.warn("use the name attribute", DeprecationWarning)
130 if self.mode == WRITE and self.name[-3:] != ".gz":
131 return self.name + ".gz"
132 return self.name
133
Guido van Rossum15262191997-04-30 16:04:57 +0000134 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000135 s = repr(self.fileobj)
136 return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
Guido van Rossum15262191997-04-30 16:04:57 +0000137
138 def _init_write(self, filename):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000139 self.name = filename
Christian Heimesfe337bf2008-03-23 21:54:12 +0000140 self.crc = zlib.crc32("") & 0xffffffff
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000141 self.size = 0
142 self.writebuf = []
143 self.bufsize = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000144
145 def _write_gzip_header(self):
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000146 self.fileobj.write(b'\037\213') # magic header
147 self.fileobj.write(b'\010') # compression method
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000148 try:
Lars Gustäbelead70562007-08-13 09:05:16 +0000149 # RFC 1952 requires the FNAME field to be Latin-1. Do not
150 # include filenames that cannot be represented that way.
151 fname = self.name.encode('latin-1')
152 if fname.endswith(b'.gz'):
153 fname = fname[:-3]
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000154 except UnicodeEncodeError:
Lars Gustäbelead70562007-08-13 09:05:16 +0000155 fname = b''
156 flags = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000157 if fname:
158 flags = FNAME
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000159 self.fileobj.write(chr(flags).encode('latin-1'))
Guido van Rossume2a383d2007-01-15 16:59:06 +0000160 write32u(self.fileobj, int(time.time()))
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000161 self.fileobj.write(b'\002')
162 self.fileobj.write(b'\377')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000163 if fname:
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000164 self.fileobj.write(fname + b'\000')
Guido van Rossum15262191997-04-30 16:04:57 +0000165
166 def _init_read(self):
Christian Heimesfe337bf2008-03-23 21:54:12 +0000167 self.crc = zlib.crc32("") & 0xffffffff
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000168 self.size = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000169
170 def _read_gzip_header(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000171 magic = self.fileobj.read(2)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000172 if magic != b'\037\213':
Collin Winterce36ad82007-08-30 01:19:48 +0000173 raise IOError('Not a gzipped file')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000174 method = ord( self.fileobj.read(1) )
175 if method != 8:
Collin Winterce36ad82007-08-30 01:19:48 +0000176 raise IOError('Unknown compression method')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000177 flag = ord( self.fileobj.read(1) )
178 # modtime = self.fileobj.read(4)
179 # extraflag = self.fileobj.read(1)
180 # os = self.fileobj.read(1)
181 self.fileobj.read(6)
Guido van Rossum15262191997-04-30 16:04:57 +0000182
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000183 if flag & FEXTRA:
184 # Read & discard the extra field, if present
Tim Petersfb0ea522002-11-04 19:50:11 +0000185 xlen = ord(self.fileobj.read(1))
186 xlen = xlen + 256*ord(self.fileobj.read(1))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000187 self.fileobj.read(xlen)
188 if flag & FNAME:
189 # Read and discard a null-terminated string containing the filename
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000190 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000191 s = self.fileobj.read(1)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000192 if not s or s==b'\000':
Tim Petersfb0ea522002-11-04 19:50:11 +0000193 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000194 if flag & FCOMMENT:
195 # Read and discard a null-terminated string containing a comment
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000196 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000197 s = self.fileobj.read(1)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000198 if not s or s==b'\000':
Tim Petersfb0ea522002-11-04 19:50:11 +0000199 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000200 if flag & FHCRC:
201 self.fileobj.read(2) # Read & discard the 16-bit header CRC
Guido van Rossum15262191997-04-30 16:04:57 +0000202
203
204 def write(self,data):
Martin v. Löwisdb044892002-03-11 06:46:52 +0000205 if self.mode != WRITE:
206 import errno
207 raise IOError(errno.EBADF, "write() on read-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000208
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000209 if self.fileobj is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000210 raise ValueError("write() on closed GzipFile object")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000211 if len(data) > 0:
212 self.size = self.size + len(data)
Christian Heimesfe337bf2008-03-23 21:54:12 +0000213 self.crc = zlib.crc32(data, self.crc) & 0xffffffff
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000214 self.fileobj.write( self.compress.compress(data) )
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000215 self.offset += len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000216
Guido van Rossum56068012000-02-02 16:51:06 +0000217 def read(self, size=-1):
Martin v. Löwisdb044892002-03-11 06:46:52 +0000218 if self.mode != READ:
219 import errno
Brett Cannonedfb3022003-12-04 19:28:06 +0000220 raise IOError(errno.EBADF, "read() on write-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000221
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000222 if self.extrasize <= 0 and self.fileobj is None:
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000223 return b''
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000224
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000225 readsize = 1024
Guido van Rossum56068012000-02-02 16:51:06 +0000226 if size < 0: # get the whole thing
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000227 try:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000228 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000229 self._read(readsize)
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000230 readsize = min(self.max_read_chunk, readsize * 2)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000231 except EOFError:
232 size = self.extrasize
233 else: # just get some more of it
234 try:
235 while size > self.extrasize:
236 self._read(readsize)
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000237 readsize = min(self.max_read_chunk, readsize * 2)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000238 except EOFError:
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000239 if size > self.extrasize:
240 size = self.extrasize
Tim Peters07e99cb2001-01-14 23:47:14 +0000241
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000242 chunk = self.extrabuf[:size]
243 self.extrabuf = self.extrabuf[size:]
244 self.extrasize = self.extrasize - size
Guido van Rossum15262191997-04-30 16:04:57 +0000245
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000246 self.offset += size
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000247 return chunk
Guido van Rossum15262191997-04-30 16:04:57 +0000248
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000249 def _unread(self, buf):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000250 self.extrabuf = buf + self.extrabuf
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000251 self.extrasize = len(buf) + self.extrasize
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000252 self.offset -= len(buf)
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000253
254 def _read(self, size=1024):
Tim Petersfb0ea522002-11-04 19:50:11 +0000255 if self.fileobj is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000256 raise EOFError("Reached EOF")
Tim Peters07e99cb2001-01-14 23:47:14 +0000257
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000258 if self._new_member:
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000259 # If the _new_member flag is set, we have to
260 # jump to the next member, if there is one.
Tim Peters07e99cb2001-01-14 23:47:14 +0000261 #
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000262 # First, check if we're at the end of the file;
263 # if so, it's time to stop; no more members to read.
264 pos = self.fileobj.tell() # Save current position
265 self.fileobj.seek(0, 2) # Seek to end of file
266 if pos == self.fileobj.tell():
Collin Winterce36ad82007-08-30 01:19:48 +0000267 raise EOFError("Reached EOF")
Tim Peters07e99cb2001-01-14 23:47:14 +0000268 else:
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000269 self.fileobj.seek( pos ) # Return to original position
Tim Peters07e99cb2001-01-14 23:47:14 +0000270
271 self._init_read()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000272 self._read_gzip_header()
273 self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000274 self._new_member = False
Tim Peters07e99cb2001-01-14 23:47:14 +0000275
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000276 # Read a chunk of data from the file
277 buf = self.fileobj.read(size)
Tim Peters07e99cb2001-01-14 23:47:14 +0000278
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000279 # If the EOF has been reached, flush the decompression object
280 # and mark this object as finished.
Tim Peters07e99cb2001-01-14 23:47:14 +0000281
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000282 if buf == b"":
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000283 uncompress = self.decompress.flush()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000284 self._read_eof()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000285 self._add_read_data( uncompress )
Collin Winterce36ad82007-08-30 01:19:48 +0000286 raise EOFError('Reached EOF')
Tim Peters07e99cb2001-01-14 23:47:14 +0000287
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000288 uncompress = self.decompress.decompress(buf)
289 self._add_read_data( uncompress )
290
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000291 if self.decompress.unused_data != b"":
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000292 # Ending case: we've come to the end of a member in the file,
293 # so seek back to the start of the unused data, finish up
294 # this member, and read a new gzip header.
295 # (The number of bytes to seek back is the length of the unused
296 # data, minus 8 because _read_eof() will rewind a further 8 bytes)
297 self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
298
299 # Check the CRC and file size, and set the flag so we read
Tim Peters07e99cb2001-01-14 23:47:14 +0000300 # a new member on the next call
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000301 self._read_eof()
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000302 self._new_member = True
Tim Peters07e99cb2001-01-14 23:47:14 +0000303
304 def _add_read_data(self, data):
Christian Heimesfe337bf2008-03-23 21:54:12 +0000305 self.crc = zlib.crc32(data, self.crc) & 0xffffffff
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000306 self.extrabuf = self.extrabuf + data
307 self.extrasize = self.extrasize + len(data)
308 self.size = self.size + len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000309
310 def _read_eof(self):
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000311 # We've read to the end of the file, so we have to rewind in order
Tim Peters07e99cb2001-01-14 23:47:14 +0000312 # to reread the 8 bytes containing the CRC and the file size.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000313 # We check the that the computed CRC and size of the
Tim Peters9288f952002-11-05 20:38:55 +0000314 # uncompressed data matches the stored values. Note that the size
315 # stored is the true file size mod 2**32.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000316 self.fileobj.seek(-8, 1)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000317 crc32 = read32(self.fileobj)
Christian Heimesfe337bf2008-03-23 21:54:12 +0000318 isize = read32(self.fileobj) # may exceed 2GB
319 if crc32 != self.crc:
320 raise IOError("CRC check failed %s != %s" % (hex(crc32),
321 hex(self.crc)))
Christian Heimes1dc54002008-03-24 02:19:29 +0000322 elif isize != (self.size & 0xffffffff):
Collin Winterce36ad82007-08-30 01:19:48 +0000323 raise IOError("Incorrect length of data produced")
Tim Peters07e99cb2001-01-14 23:47:14 +0000324
Guido van Rossum15262191997-04-30 16:04:57 +0000325 def close(self):
Georg Brandlb533e262008-05-25 18:19:30 +0000326 if self.fileobj is None:
327 return
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000328 if self.mode == WRITE:
329 self.fileobj.write(self.compress.flush())
Christian Heimesfe337bf2008-03-23 21:54:12 +0000330 write32u(self.fileobj, self.crc)
Tim Peters9288f952002-11-05 20:38:55 +0000331 # self.size may exceed 2GB, or even 4GB
Christian Heimes1dc54002008-03-24 02:19:29 +0000332 write32u(self.fileobj, self.size & 0xffffffff)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000333 self.fileobj = None
334 elif self.mode == READ:
335 self.fileobj = None
336 if self.myfileobj:
337 self.myfileobj.close()
338 self.myfileobj = None
Guido van Rossum15262191997-04-30 16:04:57 +0000339
Andrew M. Kuchling916fcc31999-08-10 13:19:30 +0000340 def __del__(self):
Jeremy Hyltone298c302000-05-08 16:59:59 +0000341 try:
342 if (self.myfileobj is None and
343 self.fileobj is None):
344 return
345 except AttributeError:
346 return
347 self.close()
Tim Peters07e99cb2001-01-14 23:47:14 +0000348
Martin v. Löwisf2a8d632005-03-03 08:35:22 +0000349 def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
350 if self.mode == WRITE:
Tim Peterseba28be2005-03-28 01:08:02 +0000351 # Ensure the compressor's buffer is flushed
352 self.fileobj.write(self.compress.flush(zlib_mode))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000353 self.fileobj.flush()
Guido van Rossum15262191997-04-30 16:04:57 +0000354
Tim Peters5cfb05e2004-07-27 21:02:02 +0000355 def fileno(self):
356 """Invoke the underlying file object's fileno() method.
357
358 This will raise AttributeError if the underlying file object
359 doesn't support fileno().
360 """
361 return self.fileobj.fileno()
362
Guido van Rossum15262191997-04-30 16:04:57 +0000363 def isatty(self):
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000364 return False
Guido van Rossum15262191997-04-30 16:04:57 +0000365
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000366 def tell(self):
367 return self.offset
368
369 def rewind(self):
370 '''Return the uncompressed stream file position indicator to the
Tim Petersab9ba272001-08-09 21:40:30 +0000371 beginning of the file'''
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000372 if self.mode != READ:
373 raise IOError("Can't rewind in write mode")
374 self.fileobj.seek(0)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000375 self._new_member = True
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000376 self.extrabuf = b""
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000377 self.extrasize = 0
378 self.offset = 0
379
Thomas Wouters89f507f2006-12-13 04:49:30 +0000380 def seek(self, offset, whence=0):
381 if whence:
382 if whence == 1:
383 offset = self.offset + offset
384 else:
385 raise ValueError('Seek from end not supported')
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000386 if self.mode == WRITE:
387 if offset < self.offset:
388 raise IOError('Negative seek in write mode')
389 count = offset - self.offset
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000390 chunk = bytes(1024)
Tim Petersfb0ea522002-11-04 19:50:11 +0000391 for i in range(count // 1024):
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000392 self.write(chunk)
393 self.write(bytes(count % 1024))
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000394 elif self.mode == READ:
395 if offset < self.offset:
396 # for negative seek, rewind and do positive seek
397 self.rewind()
398 count = offset - self.offset
Tim Petersfb0ea522002-11-04 19:50:11 +0000399 for i in range(count // 1024):
400 self.read(1024)
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000401 self.read(count % 1024)
402
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000403 def readline(self, size=-1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000404 if size < 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000405 size = sys.maxsize
Thomas Wouters477c8d52006-05-27 19:21:47 +0000406 readsize = self.min_readsize
407 else:
408 readsize = size
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000409 bufs = []
Thomas Wouters477c8d52006-05-27 19:21:47 +0000410 while size != 0:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000411 c = self.read(readsize)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000412 i = c.find(b'\n')
Thomas Wouters477c8d52006-05-27 19:21:47 +0000413
414 # We set i=size to break out of the loop under two
415 # conditions: 1) there's no newline, and the chunk is
416 # larger than size, or 2) there is a newline, but the
417 # resulting line would be longer than 'size'.
418 if (size <= i) or (i == -1 and len(c) > size):
419 i = size - 1
Guido van Rossum15262191997-04-30 16:04:57 +0000420
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000421 if i >= 0 or c == b'':
Thomas Wouters477c8d52006-05-27 19:21:47 +0000422 bufs.append(c[:i + 1]) # Add portion of last chunk
423 self._unread(c[i + 1:]) # Push back rest of chunk
424 break
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000425
426 # Append chunk to list, decrease 'size',
427 bufs.append(c)
428 size = size - len(c)
429 readsize = min(size, readsize * 2)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000430 if readsize > self.min_readsize:
431 self.min_readsize = min(readsize, self.min_readsize * 2, 512)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000432 return b''.join(bufs) # Return resulting line
Tim Peters07e99cb2001-01-14 23:47:14 +0000433
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000434 def readlines(self, sizehint=0):
435 # Negative numbers result in reading all the lines
Tim Petersfb0ea522002-11-04 19:50:11 +0000436 if sizehint <= 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000437 sizehint = sys.maxsize
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000438 L = []
439 while sizehint > 0:
440 line = self.readline()
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000441 if line == b"":
Tim Petersfb0ea522002-11-04 19:50:11 +0000442 break
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000443 L.append(line)
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000444 sizehint = sizehint - len(line)
445
446 return L
Guido van Rossum15262191997-04-30 16:04:57 +0000447
Guido van Rossum68de3791997-07-19 20:22:23 +0000448 def writelines(self, L):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000449 for line in L:
450 self.write(line)
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000451
Neil Schemenauercacbdf62002-03-20 18:36:00 +0000452 def __iter__(self):
453 return self
454
Georg Brandla18af4e2007-04-21 15:47:16 +0000455 def __next__(self):
Neil Schemenauercacbdf62002-03-20 18:36:00 +0000456 line = self.readline()
457 if line:
458 return line
459 else:
460 raise StopIteration
461
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000462
463def _test():
464 # Act like gzip; with -d, act like gunzip.
465 # The input file is not deleted, however, nor are any other gzip
466 # options or features supported.
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000467 args = sys.argv[1:]
468 decompress = args and args[0] == "-d"
469 if decompress:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000470 args = args[1:]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000471 if not args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000472 args = ["-"]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000473 for arg in args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000474 if decompress:
475 if arg == "-":
Antoine Pitrou9d625c22009-01-04 21:11:10 +0000476 f = GzipFile(filename="", mode="rb", fileobj=sys.stdin.buffer)
477 g = sys.stdout.buffer
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000478 else:
479 if arg[-3:] != ".gz":
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000480 print("filename doesn't end in .gz:", repr(arg))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000481 continue
482 f = open(arg, "rb")
Georg Brandl1a3284e2007-12-02 09:40:06 +0000483 g = builtins.open(arg[:-3], "wb")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000484 else:
485 if arg == "-":
Antoine Pitrou9d625c22009-01-04 21:11:10 +0000486 f = sys.stdin.buffer
487 g = GzipFile(filename="", mode="wb", fileobj=sys.stdout.buffer)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000488 else:
Georg Brandl1a3284e2007-12-02 09:40:06 +0000489 f = builtins.open(arg, "rb")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000490 g = open(arg + ".gz", "wb")
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000491 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000492 chunk = f.read(1024)
493 if not chunk:
494 break
495 g.write(chunk)
496 if g is not sys.stdout:
497 g.close()
498 if f is not sys.stdin:
499 f.close()
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000500
501if __name__ == '__main__':
502 _test()