blob: 45fae9facf869ee62a04e96e410bd88e0780cafe [file] [log] [blame]
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00001"""Functions that read and write gzipped files.
2
Guido van Rossum54f22ed2000-02-04 15:10:34 +00003The user of the file doesn't have to worry about the compression,
4but random access is not allowed."""
5
6# based on Andrew Kuchling's minigzip.py distributed with the zlib module
7
Tim Peters49667c22004-07-27 21:05:21 +00008import struct, sys, time
Guido van Rossum15262191997-04-30 16:04:57 +00009import zlib
Georg Brandl1a3284e2007-12-02 09:40:06 +000010import builtins
Guido van Rossum15262191997-04-30 16:04:57 +000011
Skip Montanaro2dd42762001-01-23 15:35:05 +000012__all__ = ["GzipFile","open"]
13
Guido van Rossum15262191997-04-30 16:04:57 +000014FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
15
16READ, WRITE = 1, 2
17
Tim Petersfb0ea522002-11-04 19:50:11 +000018def U32(i):
19 """Return i as an unsigned integer, assuming it fits in 32 bits.
Tim Petersfb0ea522002-11-04 19:50:11 +000020 If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
21 """
22 if i < 0:
Guido van Rossume2a383d2007-01-15 16:59:06 +000023 i += 1 << 32
Tim Petersfb0ea522002-11-04 19:50:11 +000024 return i
25
Tim Peters9288f952002-11-05 20:38:55 +000026def LOWU32(i):
Christian Heimesfe337bf2008-03-23 21:54:12 +000027 """Return the low-order 32 bits, as a non-negative int"""
Guido van Rossume2a383d2007-01-15 16:59:06 +000028 return i & 0xFFFFFFFF
Tim Peters9288f952002-11-05 20:38:55 +000029
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000030def write32u(output, value):
Tim Petersfb0ea522002-11-04 19:50:11 +000031 # The L format writes the bit pattern correctly whether signed
32 # or unsigned.
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000033 output.write(struct.pack("<L", value))
34
Guido van Rossum15262191997-04-30 16:04:57 +000035def read32(input):
Christian Heimesfe337bf2008-03-23 21:54:12 +000036 return struct.unpack("<I", input.read(4))[0]
Guido van Rossum15262191997-04-30 16:04:57 +000037
Fred Drakefa1591c1999-04-05 18:37:59 +000038def open(filename, mode="rb", compresslevel=9):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000039 """Shorthand for GzipFile(filename, mode, compresslevel).
40
41 The filename argument is required; mode defaults to 'rb'
42 and compresslevel defaults to 9.
43
44 """
Guido van Rossum15262191997-04-30 16:04:57 +000045 return GzipFile(filename, mode, compresslevel)
46
47class GzipFile:
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000048 """The GzipFile class simulates most of the methods of a file object with
Guido van Rossum97c5fcc2002-08-06 17:03:25 +000049 the exception of the readinto() and truncate() methods.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000050
51 """
Guido van Rossum15262191997-04-30 16:04:57 +000052
Guido van Rossum68de3791997-07-19 20:22:23 +000053 myfileobj = None
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +000054 max_read_chunk = 10 * 1024 * 1024 # 10Mb
Guido van Rossum68de3791997-07-19 20:22:23 +000055
Tim Peters07e99cb2001-01-14 23:47:14 +000056 def __init__(self, filename=None, mode=None,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000057 compresslevel=9, fileobj=None):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000058 """Constructor for the GzipFile class.
59
60 At least one of fileobj and filename must be given a
61 non-trivial value.
62
63 The new class instance is based on fileobj, which can be a regular
64 file, a StringIO object, or any other object which simulates a file.
65 It defaults to None, in which case filename is opened to provide
66 a file object.
67
68 When fileobj is not None, the filename argument is only used to be
69 included in the gzip file header, which may includes the original
70 filename of the uncompressed file. It defaults to the filename of
71 fileobj, if discernible; otherwise, it defaults to the empty string,
72 and in this case the original filename is not included in the header.
73
74 The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
75 depending on whether the file will be read or written. The default
76 is the mode of fileobj if discernible; otherwise, the default is 'rb'.
77 Be aware that only the 'rb', 'ab', and 'wb' values should be used
78 for cross-platform portability.
79
80 The compresslevel argument is an integer from 1 to 9 controlling the
81 level of compression; 1 is fastest and produces the least compression,
82 and 9 is slowest and produces the most compression. The default is 9.
83
84 """
85
Skip Montanaro12424bc2002-05-23 01:43:05 +000086 # guarantee the file is opened in binary mode on platforms
87 # that care about that sort of thing
88 if mode and 'b' not in mode:
89 mode += 'b'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000090 if fileobj is None:
Georg Brandl1a3284e2007-12-02 09:40:06 +000091 fileobj = self.myfileobj = builtins.open(filename, mode or 'rb')
Guido van Rossum68de3791997-07-19 20:22:23 +000092 if filename is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000093 if hasattr(fileobj, 'name'): filename = fileobj.name
94 else: filename = ''
Guido van Rossum68de3791997-07-19 20:22:23 +000095 if mode is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000096 if hasattr(fileobj, 'mode'): mode = fileobj.mode
Fred Drake9bb76d11999-04-05 18:33:40 +000097 else: mode = 'rb'
Guido van Rossum68de3791997-07-19 20:22:23 +000098
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000099 if mode[0:1] == 'r':
100 self.mode = READ
Tim Peters07e99cb2001-01-14 23:47:14 +0000101 # Set flag indicating start of a new member
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000102 self._new_member = True
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000103 self.extrabuf = b""
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000104 self.extrasize = 0
Thomas Wouterscf297e42007-02-23 15:07:44 +0000105 self.name = filename
Thomas Wouters477c8d52006-05-27 19:21:47 +0000106 # Starts small, scales exponentially
107 self.min_readsize = 100
Guido van Rossum15262191997-04-30 16:04:57 +0000108
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000109 elif mode[0:1] == 'w' or mode[0:1] == 'a':
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000110 self.mode = WRITE
111 self._init_write(filename)
112 self.compress = zlib.compressobj(compresslevel,
Tim Peters07e99cb2001-01-14 23:47:14 +0000113 zlib.DEFLATED,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000114 -zlib.MAX_WBITS,
115 zlib.DEF_MEM_LEVEL,
116 0)
117 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000118 raise IOError("Mode " + mode + " not supported")
Guido van Rossum15262191997-04-30 16:04:57 +0000119
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000120 self.fileobj = fileobj
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000121 self.offset = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000122
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000123 if self.mode == WRITE:
124 self._write_gzip_header()
Guido van Rossum15262191997-04-30 16:04:57 +0000125
Thomas Wouterscf297e42007-02-23 15:07:44 +0000126 @property
127 def filename(self):
128 import warnings
129 warnings.warn("use the name attribute", DeprecationWarning)
130 if self.mode == WRITE and self.name[-3:] != ".gz":
131 return self.name + ".gz"
132 return self.name
133
Guido van Rossum15262191997-04-30 16:04:57 +0000134 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000135 s = repr(self.fileobj)
136 return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
Guido van Rossum15262191997-04-30 16:04:57 +0000137
138 def _init_write(self, filename):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000139 self.name = filename
Christian Heimesfe337bf2008-03-23 21:54:12 +0000140 self.crc = zlib.crc32("") & 0xffffffff
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000141 self.size = 0
142 self.writebuf = []
143 self.bufsize = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000144
145 def _write_gzip_header(self):
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000146 self.fileobj.write(b'\037\213') # magic header
147 self.fileobj.write(b'\010') # compression method
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000148 try:
Lars Gustäbelead70562007-08-13 09:05:16 +0000149 # RFC 1952 requires the FNAME field to be Latin-1. Do not
150 # include filenames that cannot be represented that way.
151 fname = self.name.encode('latin-1')
152 if fname.endswith(b'.gz'):
153 fname = fname[:-3]
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000154 except UnicodeEncodeError:
Lars Gustäbelead70562007-08-13 09:05:16 +0000155 fname = b''
156 flags = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000157 if fname:
158 flags = FNAME
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000159 self.fileobj.write(chr(flags).encode('latin-1'))
Guido van Rossume2a383d2007-01-15 16:59:06 +0000160 write32u(self.fileobj, int(time.time()))
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000161 self.fileobj.write(b'\002')
162 self.fileobj.write(b'\377')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000163 if fname:
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000164 self.fileobj.write(fname + b'\000')
Guido van Rossum15262191997-04-30 16:04:57 +0000165
166 def _init_read(self):
Christian Heimesfe337bf2008-03-23 21:54:12 +0000167 self.crc = zlib.crc32("") & 0xffffffff
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000168 self.size = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000169
170 def _read_gzip_header(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000171 magic = self.fileobj.read(2)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000172 if magic != b'\037\213':
Collin Winterce36ad82007-08-30 01:19:48 +0000173 raise IOError('Not a gzipped file')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000174 method = ord( self.fileobj.read(1) )
175 if method != 8:
Collin Winterce36ad82007-08-30 01:19:48 +0000176 raise IOError('Unknown compression method')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000177 flag = ord( self.fileobj.read(1) )
178 # modtime = self.fileobj.read(4)
179 # extraflag = self.fileobj.read(1)
180 # os = self.fileobj.read(1)
181 self.fileobj.read(6)
Guido van Rossum15262191997-04-30 16:04:57 +0000182
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000183 if flag & FEXTRA:
184 # Read & discard the extra field, if present
Tim Petersfb0ea522002-11-04 19:50:11 +0000185 xlen = ord(self.fileobj.read(1))
186 xlen = xlen + 256*ord(self.fileobj.read(1))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000187 self.fileobj.read(xlen)
188 if flag & FNAME:
189 # Read and discard a null-terminated string containing the filename
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000190 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000191 s = self.fileobj.read(1)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000192 if not s or s==b'\000':
Tim Petersfb0ea522002-11-04 19:50:11 +0000193 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000194 if flag & FCOMMENT:
195 # Read and discard a null-terminated string containing a comment
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000196 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000197 s = self.fileobj.read(1)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000198 if not s or s==b'\000':
Tim Petersfb0ea522002-11-04 19:50:11 +0000199 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000200 if flag & FHCRC:
201 self.fileobj.read(2) # Read & discard the 16-bit header CRC
Guido van Rossum15262191997-04-30 16:04:57 +0000202
203
204 def write(self,data):
Martin v. Löwisdb044892002-03-11 06:46:52 +0000205 if self.mode != WRITE:
206 import errno
207 raise IOError(errno.EBADF, "write() on read-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000208
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000209 if self.fileobj is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000210 raise ValueError("write() on closed GzipFile object")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000211 if len(data) > 0:
212 self.size = self.size + len(data)
Christian Heimesfe337bf2008-03-23 21:54:12 +0000213 self.crc = zlib.crc32(data, self.crc) & 0xffffffff
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000214 self.fileobj.write( self.compress.compress(data) )
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000215 self.offset += len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000216
Guido van Rossum56068012000-02-02 16:51:06 +0000217 def read(self, size=-1):
Martin v. Löwisdb044892002-03-11 06:46:52 +0000218 if self.mode != READ:
219 import errno
Brett Cannonedfb3022003-12-04 19:28:06 +0000220 raise IOError(errno.EBADF, "read() on write-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000221
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000222 if self.extrasize <= 0 and self.fileobj is None:
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000223 return b''
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000224
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000225 readsize = 1024
Guido van Rossum56068012000-02-02 16:51:06 +0000226 if size < 0: # get the whole thing
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000227 try:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000228 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000229 self._read(readsize)
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000230 readsize = min(self.max_read_chunk, readsize * 2)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000231 except EOFError:
232 size = self.extrasize
233 else: # just get some more of it
234 try:
235 while size > self.extrasize:
236 self._read(readsize)
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000237 readsize = min(self.max_read_chunk, readsize * 2)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000238 except EOFError:
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000239 if size > self.extrasize:
240 size = self.extrasize
Tim Peters07e99cb2001-01-14 23:47:14 +0000241
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000242 chunk = self.extrabuf[:size]
243 self.extrabuf = self.extrabuf[size:]
244 self.extrasize = self.extrasize - size
Guido van Rossum15262191997-04-30 16:04:57 +0000245
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000246 self.offset += size
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000247 return chunk
Guido van Rossum15262191997-04-30 16:04:57 +0000248
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000249 def _unread(self, buf):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000250 self.extrabuf = buf + self.extrabuf
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000251 self.extrasize = len(buf) + self.extrasize
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000252 self.offset -= len(buf)
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000253
254 def _read(self, size=1024):
Tim Petersfb0ea522002-11-04 19:50:11 +0000255 if self.fileobj is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000256 raise EOFError("Reached EOF")
Tim Peters07e99cb2001-01-14 23:47:14 +0000257
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000258 if self._new_member:
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000259 # If the _new_member flag is set, we have to
260 # jump to the next member, if there is one.
Tim Peters07e99cb2001-01-14 23:47:14 +0000261 #
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000262 # First, check if we're at the end of the file;
263 # if so, it's time to stop; no more members to read.
264 pos = self.fileobj.tell() # Save current position
265 self.fileobj.seek(0, 2) # Seek to end of file
266 if pos == self.fileobj.tell():
Collin Winterce36ad82007-08-30 01:19:48 +0000267 raise EOFError("Reached EOF")
Tim Peters07e99cb2001-01-14 23:47:14 +0000268 else:
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000269 self.fileobj.seek( pos ) # Return to original position
Tim Peters07e99cb2001-01-14 23:47:14 +0000270
271 self._init_read()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000272 self._read_gzip_header()
273 self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000274 self._new_member = False
Tim Peters07e99cb2001-01-14 23:47:14 +0000275
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000276 # Read a chunk of data from the file
277 buf = self.fileobj.read(size)
Tim Peters07e99cb2001-01-14 23:47:14 +0000278
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000279 # If the EOF has been reached, flush the decompression object
280 # and mark this object as finished.
Tim Peters07e99cb2001-01-14 23:47:14 +0000281
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000282 if buf == b"":
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000283 uncompress = self.decompress.flush()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000284 self._read_eof()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000285 self._add_read_data( uncompress )
Collin Winterce36ad82007-08-30 01:19:48 +0000286 raise EOFError('Reached EOF')
Tim Peters07e99cb2001-01-14 23:47:14 +0000287
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000288 uncompress = self.decompress.decompress(buf)
289 self._add_read_data( uncompress )
290
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000291 if self.decompress.unused_data != b"":
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000292 # Ending case: we've come to the end of a member in the file,
293 # so seek back to the start of the unused data, finish up
294 # this member, and read a new gzip header.
295 # (The number of bytes to seek back is the length of the unused
296 # data, minus 8 because _read_eof() will rewind a further 8 bytes)
297 self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
298
299 # Check the CRC and file size, and set the flag so we read
Tim Peters07e99cb2001-01-14 23:47:14 +0000300 # a new member on the next call
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000301 self._read_eof()
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000302 self._new_member = True
Tim Peters07e99cb2001-01-14 23:47:14 +0000303
304 def _add_read_data(self, data):
Christian Heimesfe337bf2008-03-23 21:54:12 +0000305 self.crc = zlib.crc32(data, self.crc) & 0xffffffff
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000306 self.extrabuf = self.extrabuf + data
307 self.extrasize = self.extrasize + len(data)
308 self.size = self.size + len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000309
310 def _read_eof(self):
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000311 # We've read to the end of the file, so we have to rewind in order
Tim Peters07e99cb2001-01-14 23:47:14 +0000312 # to reread the 8 bytes containing the CRC and the file size.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000313 # We check the that the computed CRC and size of the
Tim Peters9288f952002-11-05 20:38:55 +0000314 # uncompressed data matches the stored values. Note that the size
315 # stored is the true file size mod 2**32.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000316 self.fileobj.seek(-8, 1)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000317 crc32 = read32(self.fileobj)
Christian Heimesfe337bf2008-03-23 21:54:12 +0000318 isize = read32(self.fileobj) # may exceed 2GB
319 if crc32 != self.crc:
320 raise IOError("CRC check failed %s != %s" % (hex(crc32),
321 hex(self.crc)))
Christian Heimes1dc54002008-03-24 02:19:29 +0000322 elif isize != (self.size & 0xffffffff):
Collin Winterce36ad82007-08-30 01:19:48 +0000323 raise IOError("Incorrect length of data produced")
Tim Peters07e99cb2001-01-14 23:47:14 +0000324
Guido van Rossum15262191997-04-30 16:04:57 +0000325 def close(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000326 if self.mode == WRITE:
327 self.fileobj.write(self.compress.flush())
Christian Heimesfe337bf2008-03-23 21:54:12 +0000328 write32u(self.fileobj, self.crc)
Tim Peters9288f952002-11-05 20:38:55 +0000329 # self.size may exceed 2GB, or even 4GB
Christian Heimes1dc54002008-03-24 02:19:29 +0000330 write32u(self.fileobj, self.size & 0xffffffff)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000331 self.fileobj = None
332 elif self.mode == READ:
333 self.fileobj = None
334 if self.myfileobj:
335 self.myfileobj.close()
336 self.myfileobj = None
Guido van Rossum15262191997-04-30 16:04:57 +0000337
Andrew M. Kuchling916fcc31999-08-10 13:19:30 +0000338 def __del__(self):
Jeremy Hyltone298c302000-05-08 16:59:59 +0000339 try:
340 if (self.myfileobj is None and
341 self.fileobj is None):
342 return
343 except AttributeError:
344 return
345 self.close()
Tim Peters07e99cb2001-01-14 23:47:14 +0000346
Martin v. Löwisf2a8d632005-03-03 08:35:22 +0000347 def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
348 if self.mode == WRITE:
Tim Peterseba28be2005-03-28 01:08:02 +0000349 # Ensure the compressor's buffer is flushed
350 self.fileobj.write(self.compress.flush(zlib_mode))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000351 self.fileobj.flush()
Guido van Rossum15262191997-04-30 16:04:57 +0000352
Tim Peters5cfb05e2004-07-27 21:02:02 +0000353 def fileno(self):
354 """Invoke the underlying file object's fileno() method.
355
356 This will raise AttributeError if the underlying file object
357 doesn't support fileno().
358 """
359 return self.fileobj.fileno()
360
Guido van Rossum15262191997-04-30 16:04:57 +0000361 def isatty(self):
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000362 return False
Guido van Rossum15262191997-04-30 16:04:57 +0000363
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000364 def tell(self):
365 return self.offset
366
367 def rewind(self):
368 '''Return the uncompressed stream file position indicator to the
Tim Petersab9ba272001-08-09 21:40:30 +0000369 beginning of the file'''
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000370 if self.mode != READ:
371 raise IOError("Can't rewind in write mode")
372 self.fileobj.seek(0)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000373 self._new_member = True
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000374 self.extrabuf = b""
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000375 self.extrasize = 0
376 self.offset = 0
377
Thomas Wouters89f507f2006-12-13 04:49:30 +0000378 def seek(self, offset, whence=0):
379 if whence:
380 if whence == 1:
381 offset = self.offset + offset
382 else:
383 raise ValueError('Seek from end not supported')
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000384 if self.mode == WRITE:
385 if offset < self.offset:
386 raise IOError('Negative seek in write mode')
387 count = offset - self.offset
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000388 chunk = bytes(1024)
Tim Petersfb0ea522002-11-04 19:50:11 +0000389 for i in range(count // 1024):
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000390 self.write(chunk)
391 self.write(bytes(count % 1024))
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000392 elif self.mode == READ:
393 if offset < self.offset:
394 # for negative seek, rewind and do positive seek
395 self.rewind()
396 count = offset - self.offset
Tim Petersfb0ea522002-11-04 19:50:11 +0000397 for i in range(count // 1024):
398 self.read(1024)
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000399 self.read(count % 1024)
400
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000401 def readline(self, size=-1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000402 if size < 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000403 size = sys.maxsize
Thomas Wouters477c8d52006-05-27 19:21:47 +0000404 readsize = self.min_readsize
405 else:
406 readsize = size
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000407 bufs = []
Thomas Wouters477c8d52006-05-27 19:21:47 +0000408 while size != 0:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000409 c = self.read(readsize)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000410 i = c.find(b'\n')
Thomas Wouters477c8d52006-05-27 19:21:47 +0000411
412 # We set i=size to break out of the loop under two
413 # conditions: 1) there's no newline, and the chunk is
414 # larger than size, or 2) there is a newline, but the
415 # resulting line would be longer than 'size'.
416 if (size <= i) or (i == -1 and len(c) > size):
417 i = size - 1
Guido van Rossum15262191997-04-30 16:04:57 +0000418
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000419 if i >= 0 or c == b'':
Thomas Wouters477c8d52006-05-27 19:21:47 +0000420 bufs.append(c[:i + 1]) # Add portion of last chunk
421 self._unread(c[i + 1:]) # Push back rest of chunk
422 break
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000423
424 # Append chunk to list, decrease 'size',
425 bufs.append(c)
426 size = size - len(c)
427 readsize = min(size, readsize * 2)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000428 if readsize > self.min_readsize:
429 self.min_readsize = min(readsize, self.min_readsize * 2, 512)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000430 return b''.join(bufs) # Return resulting line
Tim Peters07e99cb2001-01-14 23:47:14 +0000431
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000432 def readlines(self, sizehint=0):
433 # Negative numbers result in reading all the lines
Tim Petersfb0ea522002-11-04 19:50:11 +0000434 if sizehint <= 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000435 sizehint = sys.maxsize
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000436 L = []
437 while sizehint > 0:
438 line = self.readline()
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000439 if line == b"":
Tim Petersfb0ea522002-11-04 19:50:11 +0000440 break
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000441 L.append(line)
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000442 sizehint = sizehint - len(line)
443
444 return L
Guido van Rossum15262191997-04-30 16:04:57 +0000445
Guido van Rossum68de3791997-07-19 20:22:23 +0000446 def writelines(self, L):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000447 for line in L:
448 self.write(line)
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000449
Neil Schemenauercacbdf62002-03-20 18:36:00 +0000450 def __iter__(self):
451 return self
452
Georg Brandla18af4e2007-04-21 15:47:16 +0000453 def __next__(self):
Neil Schemenauercacbdf62002-03-20 18:36:00 +0000454 line = self.readline()
455 if line:
456 return line
457 else:
458 raise StopIteration
459
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000460
461def _test():
462 # Act like gzip; with -d, act like gunzip.
463 # The input file is not deleted, however, nor are any other gzip
464 # options or features supported.
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000465 args = sys.argv[1:]
466 decompress = args and args[0] == "-d"
467 if decompress:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000468 args = args[1:]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000469 if not args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000470 args = ["-"]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000471 for arg in args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000472 if decompress:
473 if arg == "-":
474 f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)
475 g = sys.stdout
476 else:
477 if arg[-3:] != ".gz":
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000478 print("filename doesn't end in .gz:", repr(arg))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000479 continue
480 f = open(arg, "rb")
Georg Brandl1a3284e2007-12-02 09:40:06 +0000481 g = builtins.open(arg[:-3], "wb")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000482 else:
483 if arg == "-":
484 f = sys.stdin
485 g = GzipFile(filename="", mode="wb", fileobj=sys.stdout)
486 else:
Georg Brandl1a3284e2007-12-02 09:40:06 +0000487 f = builtins.open(arg, "rb")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000488 g = open(arg + ".gz", "wb")
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000489 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000490 chunk = f.read(1024)
491 if not chunk:
492 break
493 g.write(chunk)
494 if g is not sys.stdout:
495 g.close()
496 if f is not sys.stdin:
497 f.close()
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000498
499if __name__ == '__main__':
500 _test()