blob: f9a59d7ff0aef800021803ef472420324b319f9e [file] [log] [blame]
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00001"""Functions that read and write gzipped files.
2
Guido van Rossum54f22ed2000-02-04 15:10:34 +00003The user of the file doesn't have to worry about the compression,
4but random access is not allowed."""
5
6# based on Andrew Kuchling's minigzip.py distributed with the zlib module
7
Lars Gustäbel1440df22009-10-29 09:39:47 +00008import struct, sys, time, os
Guido van Rossum15262191997-04-30 16:04:57 +00009import zlib
Georg Brandl1a3284e2007-12-02 09:40:06 +000010import builtins
Guido van Rossum15262191997-04-30 16:04:57 +000011
Skip Montanaro2dd42762001-01-23 15:35:05 +000012__all__ = ["GzipFile","open"]
13
Guido van Rossum15262191997-04-30 16:04:57 +000014FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
15
16READ, WRITE = 1, 2
17
Tim Petersfb0ea522002-11-04 19:50:11 +000018def U32(i):
19 """Return i as an unsigned integer, assuming it fits in 32 bits.
Tim Petersfb0ea522002-11-04 19:50:11 +000020 If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
21 """
22 if i < 0:
Guido van Rossume2a383d2007-01-15 16:59:06 +000023 i += 1 << 32
Tim Petersfb0ea522002-11-04 19:50:11 +000024 return i
25
Tim Peters9288f952002-11-05 20:38:55 +000026def LOWU32(i):
Christian Heimesfe337bf2008-03-23 21:54:12 +000027 """Return the low-order 32 bits, as a non-negative int"""
Guido van Rossume2a383d2007-01-15 16:59:06 +000028 return i & 0xFFFFFFFF
Tim Peters9288f952002-11-05 20:38:55 +000029
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000030def write32u(output, value):
Tim Petersfb0ea522002-11-04 19:50:11 +000031 # The L format writes the bit pattern correctly whether signed
32 # or unsigned.
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000033 output.write(struct.pack("<L", value))
34
Guido van Rossum15262191997-04-30 16:04:57 +000035def read32(input):
Christian Heimesfe337bf2008-03-23 21:54:12 +000036 return struct.unpack("<I", input.read(4))[0]
Guido van Rossum15262191997-04-30 16:04:57 +000037
Fred Drakefa1591c1999-04-05 18:37:59 +000038def open(filename, mode="rb", compresslevel=9):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000039 """Shorthand for GzipFile(filename, mode, compresslevel).
40
41 The filename argument is required; mode defaults to 'rb'
42 and compresslevel defaults to 9.
43
44 """
Guido van Rossum15262191997-04-30 16:04:57 +000045 return GzipFile(filename, mode, compresslevel)
46
47class GzipFile:
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000048 """The GzipFile class simulates most of the methods of a file object with
Guido van Rossum97c5fcc2002-08-06 17:03:25 +000049 the exception of the readinto() and truncate() methods.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000050
51 """
Guido van Rossum15262191997-04-30 16:04:57 +000052
Guido van Rossum68de3791997-07-19 20:22:23 +000053 myfileobj = None
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +000054 max_read_chunk = 10 * 1024 * 1024 # 10Mb
Guido van Rossum68de3791997-07-19 20:22:23 +000055
Tim Peters07e99cb2001-01-14 23:47:14 +000056 def __init__(self, filename=None, mode=None,
Antoine Pitrou42db3ef2009-01-04 21:37:59 +000057 compresslevel=9, fileobj=None, mtime=None):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000058 """Constructor for the GzipFile class.
59
60 At least one of fileobj and filename must be given a
61 non-trivial value.
62
63 The new class instance is based on fileobj, which can be a regular
64 file, a StringIO object, or any other object which simulates a file.
65 It defaults to None, in which case filename is opened to provide
66 a file object.
67
68 When fileobj is not None, the filename argument is only used to be
69 included in the gzip file header, which may includes the original
70 filename of the uncompressed file. It defaults to the filename of
71 fileobj, if discernible; otherwise, it defaults to the empty string,
72 and in this case the original filename is not included in the header.
73
74 The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
75 depending on whether the file will be read or written. The default
76 is the mode of fileobj if discernible; otherwise, the default is 'rb'.
77 Be aware that only the 'rb', 'ab', and 'wb' values should be used
78 for cross-platform portability.
79
80 The compresslevel argument is an integer from 1 to 9 controlling the
81 level of compression; 1 is fastest and produces the least compression,
82 and 9 is slowest and produces the most compression. The default is 9.
83
Antoine Pitrou42db3ef2009-01-04 21:37:59 +000084 The mtime argument is an optional numeric timestamp to be written
85 to the stream when compressing. All gzip compressed streams
86 are required to contain a timestamp. If omitted or None, the
87 current time is used. This module ignores the timestamp when
88 decompressing; however, some programs, such as gunzip, make use
89 of it. The format of the timestamp is the same as that of the
90 return value of time.time() and of the st_mtime member of the
91 object returned by os.stat().
92
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000093 """
94
Skip Montanaro12424bc2002-05-23 01:43:05 +000095 # guarantee the file is opened in binary mode on platforms
96 # that care about that sort of thing
97 if mode and 'b' not in mode:
98 mode += 'b'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000099 if fileobj is None:
Georg Brandl1a3284e2007-12-02 09:40:06 +0000100 fileobj = self.myfileobj = builtins.open(filename, mode or 'rb')
Guido van Rossum68de3791997-07-19 20:22:23 +0000101 if filename is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000102 if hasattr(fileobj, 'name'): filename = fileobj.name
103 else: filename = ''
Guido van Rossum68de3791997-07-19 20:22:23 +0000104 if mode is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000105 if hasattr(fileobj, 'mode'): mode = fileobj.mode
Fred Drake9bb76d11999-04-05 18:33:40 +0000106 else: mode = 'rb'
Guido van Rossum68de3791997-07-19 20:22:23 +0000107
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000108 if mode[0:1] == 'r':
109 self.mode = READ
Tim Peters07e99cb2001-01-14 23:47:14 +0000110 # Set flag indicating start of a new member
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000111 self._new_member = True
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000112 self.extrabuf = b""
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000113 self.extrasize = 0
Thomas Wouterscf297e42007-02-23 15:07:44 +0000114 self.name = filename
Thomas Wouters477c8d52006-05-27 19:21:47 +0000115 # Starts small, scales exponentially
116 self.min_readsize = 100
Guido van Rossum15262191997-04-30 16:04:57 +0000117
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000118 elif mode[0:1] == 'w' or mode[0:1] == 'a':
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000119 self.mode = WRITE
120 self._init_write(filename)
121 self.compress = zlib.compressobj(compresslevel,
Tim Peters07e99cb2001-01-14 23:47:14 +0000122 zlib.DEFLATED,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000123 -zlib.MAX_WBITS,
124 zlib.DEF_MEM_LEVEL,
125 0)
126 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000127 raise IOError("Mode " + mode + " not supported")
Guido van Rossum15262191997-04-30 16:04:57 +0000128
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000129 self.fileobj = fileobj
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000130 self.offset = 0
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000131 self.mtime = mtime
Guido van Rossum15262191997-04-30 16:04:57 +0000132
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000133 if self.mode == WRITE:
134 self._write_gzip_header()
Guido van Rossum15262191997-04-30 16:04:57 +0000135
Thomas Wouterscf297e42007-02-23 15:07:44 +0000136 @property
137 def filename(self):
138 import warnings
Philip Jenveya394f2d2009-05-08 03:57:12 +0000139 warnings.warn("use the name attribute", DeprecationWarning, 2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000140 if self.mode == WRITE and self.name[-3:] != ".gz":
141 return self.name + ".gz"
142 return self.name
143
Guido van Rossum15262191997-04-30 16:04:57 +0000144 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000145 s = repr(self.fileobj)
146 return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
Guido van Rossum15262191997-04-30 16:04:57 +0000147
148 def _init_write(self, filename):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000149 self.name = filename
Antoine Pitrou77b338b2009-12-14 18:00:06 +0000150 self.crc = zlib.crc32(b"") & 0xffffffff
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000151 self.size = 0
152 self.writebuf = []
153 self.bufsize = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000154
155 def _write_gzip_header(self):
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000156 self.fileobj.write(b'\037\213') # magic header
157 self.fileobj.write(b'\010') # compression method
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000158 try:
Lars Gustäbelead70562007-08-13 09:05:16 +0000159 # RFC 1952 requires the FNAME field to be Latin-1. Do not
160 # include filenames that cannot be represented that way.
Lars Gustäbel1440df22009-10-29 09:39:47 +0000161 fname = os.path.basename(self.name)
162 fname = fname.encode('latin-1')
Lars Gustäbelead70562007-08-13 09:05:16 +0000163 if fname.endswith(b'.gz'):
164 fname = fname[:-3]
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000165 except UnicodeEncodeError:
Lars Gustäbelead70562007-08-13 09:05:16 +0000166 fname = b''
167 flags = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000168 if fname:
169 flags = FNAME
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000170 self.fileobj.write(chr(flags).encode('latin-1'))
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000171 mtime = self.mtime
172 if mtime is None:
173 mtime = time.time()
174 write32u(self.fileobj, int(mtime))
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000175 self.fileobj.write(b'\002')
176 self.fileobj.write(b'\377')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000177 if fname:
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000178 self.fileobj.write(fname + b'\000')
Guido van Rossum15262191997-04-30 16:04:57 +0000179
180 def _init_read(self):
Antoine Pitrou77b338b2009-12-14 18:00:06 +0000181 self.crc = zlib.crc32(b"") & 0xffffffff
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000182 self.size = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000183
184 def _read_gzip_header(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000185 magic = self.fileobj.read(2)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000186 if magic != b'\037\213':
Collin Winterce36ad82007-08-30 01:19:48 +0000187 raise IOError('Not a gzipped file')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000188 method = ord( self.fileobj.read(1) )
189 if method != 8:
Collin Winterce36ad82007-08-30 01:19:48 +0000190 raise IOError('Unknown compression method')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000191 flag = ord( self.fileobj.read(1) )
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000192 self.mtime = read32(self.fileobj)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000193 # extraflag = self.fileobj.read(1)
194 # os = self.fileobj.read(1)
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000195 self.fileobj.read(2)
Guido van Rossum15262191997-04-30 16:04:57 +0000196
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000197 if flag & FEXTRA:
198 # Read & discard the extra field, if present
Tim Petersfb0ea522002-11-04 19:50:11 +0000199 xlen = ord(self.fileobj.read(1))
200 xlen = xlen + 256*ord(self.fileobj.read(1))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000201 self.fileobj.read(xlen)
202 if flag & FNAME:
203 # Read and discard a null-terminated string containing the filename
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000204 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000205 s = self.fileobj.read(1)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000206 if not s or s==b'\000':
Tim Petersfb0ea522002-11-04 19:50:11 +0000207 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000208 if flag & FCOMMENT:
209 # Read and discard a null-terminated string containing a comment
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000210 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000211 s = self.fileobj.read(1)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000212 if not s or s==b'\000':
Tim Petersfb0ea522002-11-04 19:50:11 +0000213 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000214 if flag & FHCRC:
215 self.fileobj.read(2) # Read & discard the 16-bit header CRC
Guido van Rossum15262191997-04-30 16:04:57 +0000216
217
218 def write(self,data):
Martin v. Löwisdb044892002-03-11 06:46:52 +0000219 if self.mode != WRITE:
220 import errno
221 raise IOError(errno.EBADF, "write() on read-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000222
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000223 if self.fileobj is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000224 raise ValueError("write() on closed GzipFile object")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000225 if len(data) > 0:
226 self.size = self.size + len(data)
Christian Heimesfe337bf2008-03-23 21:54:12 +0000227 self.crc = zlib.crc32(data, self.crc) & 0xffffffff
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000228 self.fileobj.write( self.compress.compress(data) )
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000229 self.offset += len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000230
Guido van Rossum56068012000-02-02 16:51:06 +0000231 def read(self, size=-1):
Martin v. Löwisdb044892002-03-11 06:46:52 +0000232 if self.mode != READ:
233 import errno
Brett Cannonedfb3022003-12-04 19:28:06 +0000234 raise IOError(errno.EBADF, "read() on write-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000235
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000236 if self.extrasize <= 0 and self.fileobj is None:
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000237 return b''
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000238
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000239 readsize = 1024
Guido van Rossum56068012000-02-02 16:51:06 +0000240 if size < 0: # get the whole thing
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000241 try:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000242 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000243 self._read(readsize)
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000244 readsize = min(self.max_read_chunk, readsize * 2)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000245 except EOFError:
246 size = self.extrasize
247 else: # just get some more of it
248 try:
249 while size > self.extrasize:
250 self._read(readsize)
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000251 readsize = min(self.max_read_chunk, readsize * 2)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000252 except EOFError:
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000253 if size > self.extrasize:
254 size = self.extrasize
Tim Peters07e99cb2001-01-14 23:47:14 +0000255
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000256 chunk = self.extrabuf[:size]
257 self.extrabuf = self.extrabuf[size:]
258 self.extrasize = self.extrasize - size
Guido van Rossum15262191997-04-30 16:04:57 +0000259
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000260 self.offset += size
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000261 return chunk
Guido van Rossum15262191997-04-30 16:04:57 +0000262
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000263 def _unread(self, buf):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000264 self.extrabuf = buf + self.extrabuf
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000265 self.extrasize = len(buf) + self.extrasize
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000266 self.offset -= len(buf)
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000267
268 def _read(self, size=1024):
Tim Petersfb0ea522002-11-04 19:50:11 +0000269 if self.fileobj is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000270 raise EOFError("Reached EOF")
Tim Peters07e99cb2001-01-14 23:47:14 +0000271
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000272 if self._new_member:
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000273 # If the _new_member flag is set, we have to
274 # jump to the next member, if there is one.
Tim Peters07e99cb2001-01-14 23:47:14 +0000275 #
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000276 # First, check if we're at the end of the file;
277 # if so, it's time to stop; no more members to read.
278 pos = self.fileobj.tell() # Save current position
279 self.fileobj.seek(0, 2) # Seek to end of file
280 if pos == self.fileobj.tell():
Collin Winterce36ad82007-08-30 01:19:48 +0000281 raise EOFError("Reached EOF")
Tim Peters07e99cb2001-01-14 23:47:14 +0000282 else:
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000283 self.fileobj.seek( pos ) # Return to original position
Tim Peters07e99cb2001-01-14 23:47:14 +0000284
285 self._init_read()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000286 self._read_gzip_header()
287 self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000288 self._new_member = False
Tim Peters07e99cb2001-01-14 23:47:14 +0000289
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000290 # Read a chunk of data from the file
291 buf = self.fileobj.read(size)
Tim Peters07e99cb2001-01-14 23:47:14 +0000292
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000293 # If the EOF has been reached, flush the decompression object
294 # and mark this object as finished.
Tim Peters07e99cb2001-01-14 23:47:14 +0000295
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000296 if buf == b"":
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000297 uncompress = self.decompress.flush()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000298 self._read_eof()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000299 self._add_read_data( uncompress )
Collin Winterce36ad82007-08-30 01:19:48 +0000300 raise EOFError('Reached EOF')
Tim Peters07e99cb2001-01-14 23:47:14 +0000301
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000302 uncompress = self.decompress.decompress(buf)
303 self._add_read_data( uncompress )
304
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000305 if self.decompress.unused_data != b"":
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000306 # Ending case: we've come to the end of a member in the file,
307 # so seek back to the start of the unused data, finish up
308 # this member, and read a new gzip header.
309 # (The number of bytes to seek back is the length of the unused
310 # data, minus 8 because _read_eof() will rewind a further 8 bytes)
311 self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
312
313 # Check the CRC and file size, and set the flag so we read
Tim Peters07e99cb2001-01-14 23:47:14 +0000314 # a new member on the next call
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000315 self._read_eof()
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000316 self._new_member = True
Tim Peters07e99cb2001-01-14 23:47:14 +0000317
318 def _add_read_data(self, data):
Christian Heimesfe337bf2008-03-23 21:54:12 +0000319 self.crc = zlib.crc32(data, self.crc) & 0xffffffff
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000320 self.extrabuf = self.extrabuf + data
321 self.extrasize = self.extrasize + len(data)
322 self.size = self.size + len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000323
324 def _read_eof(self):
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000325 # We've read to the end of the file, so we have to rewind in order
Tim Peters07e99cb2001-01-14 23:47:14 +0000326 # to reread the 8 bytes containing the CRC and the file size.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000327 # We check the that the computed CRC and size of the
Tim Peters9288f952002-11-05 20:38:55 +0000328 # uncompressed data matches the stored values. Note that the size
329 # stored is the true file size mod 2**32.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000330 self.fileobj.seek(-8, 1)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000331 crc32 = read32(self.fileobj)
Christian Heimesfe337bf2008-03-23 21:54:12 +0000332 isize = read32(self.fileobj) # may exceed 2GB
333 if crc32 != self.crc:
334 raise IOError("CRC check failed %s != %s" % (hex(crc32),
335 hex(self.crc)))
Christian Heimes1dc54002008-03-24 02:19:29 +0000336 elif isize != (self.size & 0xffffffff):
Collin Winterce36ad82007-08-30 01:19:48 +0000337 raise IOError("Incorrect length of data produced")
Tim Peters07e99cb2001-01-14 23:47:14 +0000338
Guido van Rossum15262191997-04-30 16:04:57 +0000339 def close(self):
Georg Brandlb533e262008-05-25 18:19:30 +0000340 if self.fileobj is None:
341 return
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000342 if self.mode == WRITE:
343 self.fileobj.write(self.compress.flush())
Christian Heimesfe337bf2008-03-23 21:54:12 +0000344 write32u(self.fileobj, self.crc)
Tim Peters9288f952002-11-05 20:38:55 +0000345 # self.size may exceed 2GB, or even 4GB
Christian Heimes1dc54002008-03-24 02:19:29 +0000346 write32u(self.fileobj, self.size & 0xffffffff)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000347 self.fileobj = None
348 elif self.mode == READ:
349 self.fileobj = None
350 if self.myfileobj:
351 self.myfileobj.close()
352 self.myfileobj = None
Guido van Rossum15262191997-04-30 16:04:57 +0000353
Andrew M. Kuchling916fcc31999-08-10 13:19:30 +0000354 def __del__(self):
Jeremy Hyltone298c302000-05-08 16:59:59 +0000355 try:
356 if (self.myfileobj is None and
357 self.fileobj is None):
358 return
359 except AttributeError:
360 return
361 self.close()
Tim Peters07e99cb2001-01-14 23:47:14 +0000362
Martin v. Löwisf2a8d632005-03-03 08:35:22 +0000363 def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
364 if self.mode == WRITE:
Tim Peterseba28be2005-03-28 01:08:02 +0000365 # Ensure the compressor's buffer is flushed
366 self.fileobj.write(self.compress.flush(zlib_mode))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000367 self.fileobj.flush()
Guido van Rossum15262191997-04-30 16:04:57 +0000368
Tim Peters5cfb05e2004-07-27 21:02:02 +0000369 def fileno(self):
370 """Invoke the underlying file object's fileno() method.
371
372 This will raise AttributeError if the underlying file object
373 doesn't support fileno().
374 """
375 return self.fileobj.fileno()
376
Guido van Rossum15262191997-04-30 16:04:57 +0000377 def isatty(self):
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000378 return False
Guido van Rossum15262191997-04-30 16:04:57 +0000379
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000380 def tell(self):
381 return self.offset
382
383 def rewind(self):
384 '''Return the uncompressed stream file position indicator to the
Tim Petersab9ba272001-08-09 21:40:30 +0000385 beginning of the file'''
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000386 if self.mode != READ:
387 raise IOError("Can't rewind in write mode")
388 self.fileobj.seek(0)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000389 self._new_member = True
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000390 self.extrabuf = b""
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000391 self.extrasize = 0
392 self.offset = 0
393
Thomas Wouters89f507f2006-12-13 04:49:30 +0000394 def seek(self, offset, whence=0):
395 if whence:
396 if whence == 1:
397 offset = self.offset + offset
398 else:
399 raise ValueError('Seek from end not supported')
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000400 if self.mode == WRITE:
401 if offset < self.offset:
402 raise IOError('Negative seek in write mode')
403 count = offset - self.offset
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000404 chunk = bytes(1024)
Tim Petersfb0ea522002-11-04 19:50:11 +0000405 for i in range(count // 1024):
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000406 self.write(chunk)
407 self.write(bytes(count % 1024))
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000408 elif self.mode == READ:
409 if offset < self.offset:
410 # for negative seek, rewind and do positive seek
411 self.rewind()
412 count = offset - self.offset
Tim Petersfb0ea522002-11-04 19:50:11 +0000413 for i in range(count // 1024):
414 self.read(1024)
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000415 self.read(count % 1024)
416
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000417 def readline(self, size=-1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000418 if size < 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000419 size = sys.maxsize
Thomas Wouters477c8d52006-05-27 19:21:47 +0000420 readsize = self.min_readsize
421 else:
422 readsize = size
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000423 bufs = []
Thomas Wouters477c8d52006-05-27 19:21:47 +0000424 while size != 0:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000425 c = self.read(readsize)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000426 i = c.find(b'\n')
Thomas Wouters477c8d52006-05-27 19:21:47 +0000427
428 # We set i=size to break out of the loop under two
429 # conditions: 1) there's no newline, and the chunk is
430 # larger than size, or 2) there is a newline, but the
431 # resulting line would be longer than 'size'.
432 if (size <= i) or (i == -1 and len(c) > size):
433 i = size - 1
Guido van Rossum15262191997-04-30 16:04:57 +0000434
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000435 if i >= 0 or c == b'':
Thomas Wouters477c8d52006-05-27 19:21:47 +0000436 bufs.append(c[:i + 1]) # Add portion of last chunk
437 self._unread(c[i + 1:]) # Push back rest of chunk
438 break
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000439
440 # Append chunk to list, decrease 'size',
441 bufs.append(c)
442 size = size - len(c)
443 readsize = min(size, readsize * 2)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000444 if readsize > self.min_readsize:
445 self.min_readsize = min(readsize, self.min_readsize * 2, 512)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000446 return b''.join(bufs) # Return resulting line
Tim Peters07e99cb2001-01-14 23:47:14 +0000447
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000448 def readlines(self, sizehint=0):
449 # Negative numbers result in reading all the lines
Tim Petersfb0ea522002-11-04 19:50:11 +0000450 if sizehint <= 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000451 sizehint = sys.maxsize
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000452 L = []
453 while sizehint > 0:
454 line = self.readline()
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000455 if line == b"":
Tim Petersfb0ea522002-11-04 19:50:11 +0000456 break
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000457 L.append(line)
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000458 sizehint = sizehint - len(line)
459
460 return L
Guido van Rossum15262191997-04-30 16:04:57 +0000461
Guido van Rossum68de3791997-07-19 20:22:23 +0000462 def writelines(self, L):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000463 for line in L:
464 self.write(line)
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000465
Neil Schemenauercacbdf62002-03-20 18:36:00 +0000466 def __iter__(self):
467 return self
468
Georg Brandla18af4e2007-04-21 15:47:16 +0000469 def __next__(self):
Neil Schemenauercacbdf62002-03-20 18:36:00 +0000470 line = self.readline()
471 if line:
472 return line
473 else:
474 raise StopIteration
475
Antoine Pitrou308705e2009-01-10 16:22:51 +0000476 def __enter__(self):
477 if self.fileobj is None:
478 raise ValueError("I/O operation on closed GzipFile object")
479 return self
480
481 def __exit__(self, *args):
482 self.close()
483
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000484
485def _test():
486 # Act like gzip; with -d, act like gunzip.
487 # The input file is not deleted, however, nor are any other gzip
488 # options or features supported.
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000489 args = sys.argv[1:]
490 decompress = args and args[0] == "-d"
491 if decompress:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000492 args = args[1:]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000493 if not args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000494 args = ["-"]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000495 for arg in args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000496 if decompress:
497 if arg == "-":
Antoine Pitrou9d625c22009-01-04 21:11:10 +0000498 f = GzipFile(filename="", mode="rb", fileobj=sys.stdin.buffer)
499 g = sys.stdout.buffer
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000500 else:
501 if arg[-3:] != ".gz":
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000502 print("filename doesn't end in .gz:", repr(arg))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000503 continue
504 f = open(arg, "rb")
Georg Brandl1a3284e2007-12-02 09:40:06 +0000505 g = builtins.open(arg[:-3], "wb")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000506 else:
507 if arg == "-":
Antoine Pitrou9d625c22009-01-04 21:11:10 +0000508 f = sys.stdin.buffer
509 g = GzipFile(filename="", mode="wb", fileobj=sys.stdout.buffer)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000510 else:
Georg Brandl1a3284e2007-12-02 09:40:06 +0000511 f = builtins.open(arg, "rb")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000512 g = open(arg + ".gz", "wb")
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000513 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000514 chunk = f.read(1024)
515 if not chunk:
516 break
517 g.write(chunk)
518 if g is not sys.stdout:
519 g.close()
520 if f is not sys.stdin:
521 f.close()
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000522
523if __name__ == '__main__':
524 _test()