blob: 983e0cee07f0fc8be648b9991ef60c31b14d3285 [file] [log] [blame]
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00001"""Functions that read and write gzipped files.
2
Guido van Rossum54f22ed2000-02-04 15:10:34 +00003The user of the file doesn't have to worry about the compression,
4but random access is not allowed."""
5
6# based on Andrew Kuchling's minigzip.py distributed with the zlib module
7
Tim Peters49667c22004-07-27 21:05:21 +00008import struct, sys, time
Guido van Rossum15262191997-04-30 16:04:57 +00009import zlib
Georg Brandl1a3284e2007-12-02 09:40:06 +000010import builtins
Guido van Rossum15262191997-04-30 16:04:57 +000011
Skip Montanaro2dd42762001-01-23 15:35:05 +000012__all__ = ["GzipFile","open"]
13
Guido van Rossum15262191997-04-30 16:04:57 +000014FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
15
16READ, WRITE = 1, 2
17
Tim Petersfb0ea522002-11-04 19:50:11 +000018def U32(i):
19 """Return i as an unsigned integer, assuming it fits in 32 bits.
Tim Petersfb0ea522002-11-04 19:50:11 +000020 If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
21 """
22 if i < 0:
Guido van Rossume2a383d2007-01-15 16:59:06 +000023 i += 1 << 32
Tim Petersfb0ea522002-11-04 19:50:11 +000024 return i
25
Tim Peters9288f952002-11-05 20:38:55 +000026def LOWU32(i):
Christian Heimesfe337bf2008-03-23 21:54:12 +000027 """Return the low-order 32 bits, as a non-negative int"""
Guido van Rossume2a383d2007-01-15 16:59:06 +000028 return i & 0xFFFFFFFF
Tim Peters9288f952002-11-05 20:38:55 +000029
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000030def write32u(output, value):
Tim Petersfb0ea522002-11-04 19:50:11 +000031 # The L format writes the bit pattern correctly whether signed
32 # or unsigned.
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000033 output.write(struct.pack("<L", value))
34
Guido van Rossum15262191997-04-30 16:04:57 +000035def read32(input):
Christian Heimesfe337bf2008-03-23 21:54:12 +000036 return struct.unpack("<I", input.read(4))[0]
Guido van Rossum15262191997-04-30 16:04:57 +000037
Fred Drakefa1591c1999-04-05 18:37:59 +000038def open(filename, mode="rb", compresslevel=9):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000039 """Shorthand for GzipFile(filename, mode, compresslevel).
40
41 The filename argument is required; mode defaults to 'rb'
42 and compresslevel defaults to 9.
43
44 """
Guido van Rossum15262191997-04-30 16:04:57 +000045 return GzipFile(filename, mode, compresslevel)
46
47class GzipFile:
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000048 """The GzipFile class simulates most of the methods of a file object with
Guido van Rossum97c5fcc2002-08-06 17:03:25 +000049 the exception of the readinto() and truncate() methods.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000050
51 """
Guido van Rossum15262191997-04-30 16:04:57 +000052
Guido van Rossum68de3791997-07-19 20:22:23 +000053 myfileobj = None
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +000054 max_read_chunk = 10 * 1024 * 1024 # 10Mb
Guido van Rossum68de3791997-07-19 20:22:23 +000055
Tim Peters07e99cb2001-01-14 23:47:14 +000056 def __init__(self, filename=None, mode=None,
Antoine Pitrou42db3ef2009-01-04 21:37:59 +000057 compresslevel=9, fileobj=None, mtime=None):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000058 """Constructor for the GzipFile class.
59
60 At least one of fileobj and filename must be given a
61 non-trivial value.
62
63 The new class instance is based on fileobj, which can be a regular
64 file, a StringIO object, or any other object which simulates a file.
65 It defaults to None, in which case filename is opened to provide
66 a file object.
67
68 When fileobj is not None, the filename argument is only used to be
69 included in the gzip file header, which may includes the original
70 filename of the uncompressed file. It defaults to the filename of
71 fileobj, if discernible; otherwise, it defaults to the empty string,
72 and in this case the original filename is not included in the header.
73
74 The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
75 depending on whether the file will be read or written. The default
76 is the mode of fileobj if discernible; otherwise, the default is 'rb'.
77 Be aware that only the 'rb', 'ab', and 'wb' values should be used
78 for cross-platform portability.
79
80 The compresslevel argument is an integer from 1 to 9 controlling the
81 level of compression; 1 is fastest and produces the least compression,
82 and 9 is slowest and produces the most compression. The default is 9.
83
Antoine Pitrou42db3ef2009-01-04 21:37:59 +000084 The mtime argument is an optional numeric timestamp to be written
85 to the stream when compressing. All gzip compressed streams
86 are required to contain a timestamp. If omitted or None, the
87 current time is used. This module ignores the timestamp when
88 decompressing; however, some programs, such as gunzip, make use
89 of it. The format of the timestamp is the same as that of the
90 return value of time.time() and of the st_mtime member of the
91 object returned by os.stat().
92
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000093 """
94
Skip Montanaro12424bc2002-05-23 01:43:05 +000095 # guarantee the file is opened in binary mode on platforms
96 # that care about that sort of thing
97 if mode and 'b' not in mode:
98 mode += 'b'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000099 if fileobj is None:
Georg Brandl1a3284e2007-12-02 09:40:06 +0000100 fileobj = self.myfileobj = builtins.open(filename, mode or 'rb')
Guido van Rossum68de3791997-07-19 20:22:23 +0000101 if filename is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000102 if hasattr(fileobj, 'name'): filename = fileobj.name
103 else: filename = ''
Guido van Rossum68de3791997-07-19 20:22:23 +0000104 if mode is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000105 if hasattr(fileobj, 'mode'): mode = fileobj.mode
Fred Drake9bb76d11999-04-05 18:33:40 +0000106 else: mode = 'rb'
Guido van Rossum68de3791997-07-19 20:22:23 +0000107
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000108 if mode[0:1] == 'r':
109 self.mode = READ
Tim Peters07e99cb2001-01-14 23:47:14 +0000110 # Set flag indicating start of a new member
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000111 self._new_member = True
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000112 self.extrabuf = b""
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000113 self.extrasize = 0
Thomas Wouterscf297e42007-02-23 15:07:44 +0000114 self.name = filename
Thomas Wouters477c8d52006-05-27 19:21:47 +0000115 # Starts small, scales exponentially
116 self.min_readsize = 100
Guido van Rossum15262191997-04-30 16:04:57 +0000117
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000118 elif mode[0:1] == 'w' or mode[0:1] == 'a':
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000119 self.mode = WRITE
120 self._init_write(filename)
121 self.compress = zlib.compressobj(compresslevel,
Tim Peters07e99cb2001-01-14 23:47:14 +0000122 zlib.DEFLATED,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000123 -zlib.MAX_WBITS,
124 zlib.DEF_MEM_LEVEL,
125 0)
126 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000127 raise IOError("Mode " + mode + " not supported")
Guido van Rossum15262191997-04-30 16:04:57 +0000128
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000129 self.fileobj = fileobj
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000130 self.offset = 0
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000131 self.mtime = mtime
Guido van Rossum15262191997-04-30 16:04:57 +0000132
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000133 if self.mode == WRITE:
134 self._write_gzip_header()
Guido van Rossum15262191997-04-30 16:04:57 +0000135
Thomas Wouterscf297e42007-02-23 15:07:44 +0000136 @property
137 def filename(self):
138 import warnings
Philip Jenveya394f2d2009-05-08 03:57:12 +0000139 warnings.warn("use the name attribute", DeprecationWarning, 2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000140 if self.mode == WRITE and self.name[-3:] != ".gz":
141 return self.name + ".gz"
142 return self.name
143
Guido van Rossum15262191997-04-30 16:04:57 +0000144 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000145 s = repr(self.fileobj)
146 return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
Guido van Rossum15262191997-04-30 16:04:57 +0000147
148 def _init_write(self, filename):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000149 self.name = filename
Christian Heimesfe337bf2008-03-23 21:54:12 +0000150 self.crc = zlib.crc32("") & 0xffffffff
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000151 self.size = 0
152 self.writebuf = []
153 self.bufsize = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000154
155 def _write_gzip_header(self):
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000156 self.fileobj.write(b'\037\213') # magic header
157 self.fileobj.write(b'\010') # compression method
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000158 try:
Lars Gustäbelead70562007-08-13 09:05:16 +0000159 # RFC 1952 requires the FNAME field to be Latin-1. Do not
160 # include filenames that cannot be represented that way.
161 fname = self.name.encode('latin-1')
162 if fname.endswith(b'.gz'):
163 fname = fname[:-3]
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000164 except UnicodeEncodeError:
Lars Gustäbelead70562007-08-13 09:05:16 +0000165 fname = b''
166 flags = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000167 if fname:
168 flags = FNAME
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000169 self.fileobj.write(chr(flags).encode('latin-1'))
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000170 mtime = self.mtime
171 if mtime is None:
172 mtime = time.time()
173 write32u(self.fileobj, int(mtime))
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000174 self.fileobj.write(b'\002')
175 self.fileobj.write(b'\377')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000176 if fname:
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000177 self.fileobj.write(fname + b'\000')
Guido van Rossum15262191997-04-30 16:04:57 +0000178
179 def _init_read(self):
Christian Heimesfe337bf2008-03-23 21:54:12 +0000180 self.crc = zlib.crc32("") & 0xffffffff
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000181 self.size = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000182
183 def _read_gzip_header(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000184 magic = self.fileobj.read(2)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000185 if magic != b'\037\213':
Collin Winterce36ad82007-08-30 01:19:48 +0000186 raise IOError('Not a gzipped file')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000187 method = ord( self.fileobj.read(1) )
188 if method != 8:
Collin Winterce36ad82007-08-30 01:19:48 +0000189 raise IOError('Unknown compression method')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000190 flag = ord( self.fileobj.read(1) )
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000191 self.mtime = read32(self.fileobj)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000192 # extraflag = self.fileobj.read(1)
193 # os = self.fileobj.read(1)
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000194 self.fileobj.read(2)
Guido van Rossum15262191997-04-30 16:04:57 +0000195
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000196 if flag & FEXTRA:
197 # Read & discard the extra field, if present
Tim Petersfb0ea522002-11-04 19:50:11 +0000198 xlen = ord(self.fileobj.read(1))
199 xlen = xlen + 256*ord(self.fileobj.read(1))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000200 self.fileobj.read(xlen)
201 if flag & FNAME:
202 # Read and discard a null-terminated string containing the filename
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000203 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000204 s = self.fileobj.read(1)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000205 if not s or s==b'\000':
Tim Petersfb0ea522002-11-04 19:50:11 +0000206 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000207 if flag & FCOMMENT:
208 # Read and discard a null-terminated string containing a comment
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000209 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000210 s = self.fileobj.read(1)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000211 if not s or s==b'\000':
Tim Petersfb0ea522002-11-04 19:50:11 +0000212 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000213 if flag & FHCRC:
214 self.fileobj.read(2) # Read & discard the 16-bit header CRC
Guido van Rossum15262191997-04-30 16:04:57 +0000215
216
217 def write(self,data):
Martin v. Löwisdb044892002-03-11 06:46:52 +0000218 if self.mode != WRITE:
219 import errno
220 raise IOError(errno.EBADF, "write() on read-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000221
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000222 if self.fileobj is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000223 raise ValueError("write() on closed GzipFile object")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000224 if len(data) > 0:
225 self.size = self.size + len(data)
Christian Heimesfe337bf2008-03-23 21:54:12 +0000226 self.crc = zlib.crc32(data, self.crc) & 0xffffffff
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000227 self.fileobj.write( self.compress.compress(data) )
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000228 self.offset += len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000229
Guido van Rossum56068012000-02-02 16:51:06 +0000230 def read(self, size=-1):
Martin v. Löwisdb044892002-03-11 06:46:52 +0000231 if self.mode != READ:
232 import errno
Brett Cannonedfb3022003-12-04 19:28:06 +0000233 raise IOError(errno.EBADF, "read() on write-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000234
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000235 if self.extrasize <= 0 and self.fileobj is None:
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000236 return b''
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000237
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000238 readsize = 1024
Guido van Rossum56068012000-02-02 16:51:06 +0000239 if size < 0: # get the whole thing
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000240 try:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000241 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000242 self._read(readsize)
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000243 readsize = min(self.max_read_chunk, readsize * 2)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000244 except EOFError:
245 size = self.extrasize
246 else: # just get some more of it
247 try:
248 while size > self.extrasize:
249 self._read(readsize)
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000250 readsize = min(self.max_read_chunk, readsize * 2)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000251 except EOFError:
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000252 if size > self.extrasize:
253 size = self.extrasize
Tim Peters07e99cb2001-01-14 23:47:14 +0000254
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000255 chunk = self.extrabuf[:size]
256 self.extrabuf = self.extrabuf[size:]
257 self.extrasize = self.extrasize - size
Guido van Rossum15262191997-04-30 16:04:57 +0000258
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000259 self.offset += size
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000260 return chunk
Guido van Rossum15262191997-04-30 16:04:57 +0000261
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000262 def _unread(self, buf):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000263 self.extrabuf = buf + self.extrabuf
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000264 self.extrasize = len(buf) + self.extrasize
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000265 self.offset -= len(buf)
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000266
267 def _read(self, size=1024):
Tim Petersfb0ea522002-11-04 19:50:11 +0000268 if self.fileobj is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000269 raise EOFError("Reached EOF")
Tim Peters07e99cb2001-01-14 23:47:14 +0000270
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000271 if self._new_member:
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000272 # If the _new_member flag is set, we have to
273 # jump to the next member, if there is one.
Tim Peters07e99cb2001-01-14 23:47:14 +0000274 #
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000275 # First, check if we're at the end of the file;
276 # if so, it's time to stop; no more members to read.
277 pos = self.fileobj.tell() # Save current position
278 self.fileobj.seek(0, 2) # Seek to end of file
279 if pos == self.fileobj.tell():
Collin Winterce36ad82007-08-30 01:19:48 +0000280 raise EOFError("Reached EOF")
Tim Peters07e99cb2001-01-14 23:47:14 +0000281 else:
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000282 self.fileobj.seek( pos ) # Return to original position
Tim Peters07e99cb2001-01-14 23:47:14 +0000283
284 self._init_read()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000285 self._read_gzip_header()
286 self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000287 self._new_member = False
Tim Peters07e99cb2001-01-14 23:47:14 +0000288
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000289 # Read a chunk of data from the file
290 buf = self.fileobj.read(size)
Tim Peters07e99cb2001-01-14 23:47:14 +0000291
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000292 # If the EOF has been reached, flush the decompression object
293 # and mark this object as finished.
Tim Peters07e99cb2001-01-14 23:47:14 +0000294
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000295 if buf == b"":
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000296 uncompress = self.decompress.flush()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000297 self._read_eof()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000298 self._add_read_data( uncompress )
Collin Winterce36ad82007-08-30 01:19:48 +0000299 raise EOFError('Reached EOF')
Tim Peters07e99cb2001-01-14 23:47:14 +0000300
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000301 uncompress = self.decompress.decompress(buf)
302 self._add_read_data( uncompress )
303
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000304 if self.decompress.unused_data != b"":
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000305 # Ending case: we've come to the end of a member in the file,
306 # so seek back to the start of the unused data, finish up
307 # this member, and read a new gzip header.
308 # (The number of bytes to seek back is the length of the unused
309 # data, minus 8 because _read_eof() will rewind a further 8 bytes)
310 self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
311
312 # Check the CRC and file size, and set the flag so we read
Tim Peters07e99cb2001-01-14 23:47:14 +0000313 # a new member on the next call
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000314 self._read_eof()
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000315 self._new_member = True
Tim Peters07e99cb2001-01-14 23:47:14 +0000316
317 def _add_read_data(self, data):
Christian Heimesfe337bf2008-03-23 21:54:12 +0000318 self.crc = zlib.crc32(data, self.crc) & 0xffffffff
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000319 self.extrabuf = self.extrabuf + data
320 self.extrasize = self.extrasize + len(data)
321 self.size = self.size + len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000322
323 def _read_eof(self):
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000324 # We've read to the end of the file, so we have to rewind in order
Tim Peters07e99cb2001-01-14 23:47:14 +0000325 # to reread the 8 bytes containing the CRC and the file size.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000326 # We check the that the computed CRC and size of the
Tim Peters9288f952002-11-05 20:38:55 +0000327 # uncompressed data matches the stored values. Note that the size
328 # stored is the true file size mod 2**32.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000329 self.fileobj.seek(-8, 1)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000330 crc32 = read32(self.fileobj)
Christian Heimesfe337bf2008-03-23 21:54:12 +0000331 isize = read32(self.fileobj) # may exceed 2GB
332 if crc32 != self.crc:
333 raise IOError("CRC check failed %s != %s" % (hex(crc32),
334 hex(self.crc)))
Christian Heimes1dc54002008-03-24 02:19:29 +0000335 elif isize != (self.size & 0xffffffff):
Collin Winterce36ad82007-08-30 01:19:48 +0000336 raise IOError("Incorrect length of data produced")
Tim Peters07e99cb2001-01-14 23:47:14 +0000337
Guido van Rossum15262191997-04-30 16:04:57 +0000338 def close(self):
Georg Brandlb533e262008-05-25 18:19:30 +0000339 if self.fileobj is None:
340 return
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000341 if self.mode == WRITE:
342 self.fileobj.write(self.compress.flush())
Christian Heimesfe337bf2008-03-23 21:54:12 +0000343 write32u(self.fileobj, self.crc)
Tim Peters9288f952002-11-05 20:38:55 +0000344 # self.size may exceed 2GB, or even 4GB
Christian Heimes1dc54002008-03-24 02:19:29 +0000345 write32u(self.fileobj, self.size & 0xffffffff)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000346 self.fileobj = None
347 elif self.mode == READ:
348 self.fileobj = None
349 if self.myfileobj:
350 self.myfileobj.close()
351 self.myfileobj = None
Guido van Rossum15262191997-04-30 16:04:57 +0000352
Andrew M. Kuchling916fcc31999-08-10 13:19:30 +0000353 def __del__(self):
Jeremy Hyltone298c302000-05-08 16:59:59 +0000354 try:
355 if (self.myfileobj is None and
356 self.fileobj is None):
357 return
358 except AttributeError:
359 return
360 self.close()
Tim Peters07e99cb2001-01-14 23:47:14 +0000361
Martin v. Löwisf2a8d632005-03-03 08:35:22 +0000362 def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
363 if self.mode == WRITE:
Tim Peterseba28be2005-03-28 01:08:02 +0000364 # Ensure the compressor's buffer is flushed
365 self.fileobj.write(self.compress.flush(zlib_mode))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000366 self.fileobj.flush()
Guido van Rossum15262191997-04-30 16:04:57 +0000367
Tim Peters5cfb05e2004-07-27 21:02:02 +0000368 def fileno(self):
369 """Invoke the underlying file object's fileno() method.
370
371 This will raise AttributeError if the underlying file object
372 doesn't support fileno().
373 """
374 return self.fileobj.fileno()
375
Guido van Rossum15262191997-04-30 16:04:57 +0000376 def isatty(self):
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000377 return False
Guido van Rossum15262191997-04-30 16:04:57 +0000378
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000379 def tell(self):
380 return self.offset
381
382 def rewind(self):
383 '''Return the uncompressed stream file position indicator to the
Tim Petersab9ba272001-08-09 21:40:30 +0000384 beginning of the file'''
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000385 if self.mode != READ:
386 raise IOError("Can't rewind in write mode")
387 self.fileobj.seek(0)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000388 self._new_member = True
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000389 self.extrabuf = b""
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000390 self.extrasize = 0
391 self.offset = 0
392
Thomas Wouters89f507f2006-12-13 04:49:30 +0000393 def seek(self, offset, whence=0):
394 if whence:
395 if whence == 1:
396 offset = self.offset + offset
397 else:
398 raise ValueError('Seek from end not supported')
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000399 if self.mode == WRITE:
400 if offset < self.offset:
401 raise IOError('Negative seek in write mode')
402 count = offset - self.offset
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000403 chunk = bytes(1024)
Tim Petersfb0ea522002-11-04 19:50:11 +0000404 for i in range(count // 1024):
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000405 self.write(chunk)
406 self.write(bytes(count % 1024))
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000407 elif self.mode == READ:
408 if offset < self.offset:
409 # for negative seek, rewind and do positive seek
410 self.rewind()
411 count = offset - self.offset
Tim Petersfb0ea522002-11-04 19:50:11 +0000412 for i in range(count // 1024):
413 self.read(1024)
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000414 self.read(count % 1024)
415
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000416 def readline(self, size=-1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000417 if size < 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000418 size = sys.maxsize
Thomas Wouters477c8d52006-05-27 19:21:47 +0000419 readsize = self.min_readsize
420 else:
421 readsize = size
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000422 bufs = []
Thomas Wouters477c8d52006-05-27 19:21:47 +0000423 while size != 0:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000424 c = self.read(readsize)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000425 i = c.find(b'\n')
Thomas Wouters477c8d52006-05-27 19:21:47 +0000426
427 # We set i=size to break out of the loop under two
428 # conditions: 1) there's no newline, and the chunk is
429 # larger than size, or 2) there is a newline, but the
430 # resulting line would be longer than 'size'.
431 if (size <= i) or (i == -1 and len(c) > size):
432 i = size - 1
Guido van Rossum15262191997-04-30 16:04:57 +0000433
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000434 if i >= 0 or c == b'':
Thomas Wouters477c8d52006-05-27 19:21:47 +0000435 bufs.append(c[:i + 1]) # Add portion of last chunk
436 self._unread(c[i + 1:]) # Push back rest of chunk
437 break
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000438
439 # Append chunk to list, decrease 'size',
440 bufs.append(c)
441 size = size - len(c)
442 readsize = min(size, readsize * 2)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000443 if readsize > self.min_readsize:
444 self.min_readsize = min(readsize, self.min_readsize * 2, 512)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000445 return b''.join(bufs) # Return resulting line
Tim Peters07e99cb2001-01-14 23:47:14 +0000446
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000447 def readlines(self, sizehint=0):
448 # Negative numbers result in reading all the lines
Tim Petersfb0ea522002-11-04 19:50:11 +0000449 if sizehint <= 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000450 sizehint = sys.maxsize
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000451 L = []
452 while sizehint > 0:
453 line = self.readline()
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000454 if line == b"":
Tim Petersfb0ea522002-11-04 19:50:11 +0000455 break
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000456 L.append(line)
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000457 sizehint = sizehint - len(line)
458
459 return L
Guido van Rossum15262191997-04-30 16:04:57 +0000460
Guido van Rossum68de3791997-07-19 20:22:23 +0000461 def writelines(self, L):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000462 for line in L:
463 self.write(line)
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000464
Neil Schemenauercacbdf62002-03-20 18:36:00 +0000465 def __iter__(self):
466 return self
467
Georg Brandla18af4e2007-04-21 15:47:16 +0000468 def __next__(self):
Neil Schemenauercacbdf62002-03-20 18:36:00 +0000469 line = self.readline()
470 if line:
471 return line
472 else:
473 raise StopIteration
474
Antoine Pitrou308705e2009-01-10 16:22:51 +0000475 def __enter__(self):
476 if self.fileobj is None:
477 raise ValueError("I/O operation on closed GzipFile object")
478 return self
479
480 def __exit__(self, *args):
481 self.close()
482
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000483
484def _test():
485 # Act like gzip; with -d, act like gunzip.
486 # The input file is not deleted, however, nor are any other gzip
487 # options or features supported.
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000488 args = sys.argv[1:]
489 decompress = args and args[0] == "-d"
490 if decompress:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000491 args = args[1:]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000492 if not args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000493 args = ["-"]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000494 for arg in args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000495 if decompress:
496 if arg == "-":
Antoine Pitrou9d625c22009-01-04 21:11:10 +0000497 f = GzipFile(filename="", mode="rb", fileobj=sys.stdin.buffer)
498 g = sys.stdout.buffer
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000499 else:
500 if arg[-3:] != ".gz":
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000501 print("filename doesn't end in .gz:", repr(arg))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000502 continue
503 f = open(arg, "rb")
Georg Brandl1a3284e2007-12-02 09:40:06 +0000504 g = builtins.open(arg[:-3], "wb")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000505 else:
506 if arg == "-":
Antoine Pitrou9d625c22009-01-04 21:11:10 +0000507 f = sys.stdin.buffer
508 g = GzipFile(filename="", mode="wb", fileobj=sys.stdout.buffer)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000509 else:
Georg Brandl1a3284e2007-12-02 09:40:06 +0000510 f = builtins.open(arg, "rb")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000511 g = open(arg + ".gz", "wb")
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000512 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000513 chunk = f.read(1024)
514 if not chunk:
515 break
516 g.write(chunk)
517 if g is not sys.stdout:
518 g.close()
519 if f is not sys.stdin:
520 f.close()
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000521
522if __name__ == '__main__':
523 _test()