blob: 76ab497853c1d20dca671b58358b19e64c977673 [file] [log] [blame]
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00001"""Functions that read and write gzipped files.
2
Guido van Rossum54f22ed2000-02-04 15:10:34 +00003The user of the file doesn't have to worry about the compression,
4but random access is not allowed."""
5
6# based on Andrew Kuchling's minigzip.py distributed with the zlib module
7
Lars Gustäbel1440df22009-10-29 09:39:47 +00008import struct, sys, time, os
Guido van Rossum15262191997-04-30 16:04:57 +00009import zlib
Georg Brandl1a3284e2007-12-02 09:40:06 +000010import builtins
Antoine Pitroub1f88352010-01-03 22:37:40 +000011import io
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +020012import _compression
Guido van Rossum15262191997-04-30 16:04:57 +000013
Antoine Pitrou79c5ef12010-08-17 21:10:05 +000014__all__ = ["GzipFile", "open", "compress", "decompress"]
Skip Montanaro2dd42762001-01-23 15:35:05 +000015
Guido van Rossum15262191997-04-30 16:04:57 +000016FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
17
18READ, WRITE = 1, 2
19
Nadeem Vawda7e126202012-05-06 15:04:01 +020020def open(filename, mode="rb", compresslevel=9,
21 encoding=None, errors=None, newline=None):
22 """Open a gzip-compressed file in binary or text mode.
23
Nadeem Vawda68721012012-06-04 23:21:38 +020024 The filename argument can be an actual filename (a str or bytes object), or
25 an existing file object to read from or write to.
26
Nadeem Vawdaee1be992013-10-19 00:11:13 +020027 The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or "ab" for
28 binary mode, or "rt", "wt", "xt" or "at" for text mode. The default mode is
29 "rb", and the default compresslevel is 9.
Nadeem Vawda7e126202012-05-06 15:04:01 +020030
31 For binary mode, this function is equivalent to the GzipFile constructor:
32 GzipFile(filename, mode, compresslevel). In this case, the encoding, errors
33 and newline arguments must not be provided.
34
35 For text mode, a GzipFile object is created, and wrapped in an
36 io.TextIOWrapper instance with the specified encoding, error handling
37 behavior, and line ending(s).
38
39 """
40 if "t" in mode:
41 if "b" in mode:
42 raise ValueError("Invalid mode: %r" % (mode,))
43 else:
44 if encoding is not None:
45 raise ValueError("Argument 'encoding' not supported in binary mode")
46 if errors is not None:
47 raise ValueError("Argument 'errors' not supported in binary mode")
48 if newline is not None:
49 raise ValueError("Argument 'newline' not supported in binary mode")
Nadeem Vawda68721012012-06-04 23:21:38 +020050
51 gz_mode = mode.replace("t", "")
Berker Peksag03020cf2016-10-02 13:47:58 +030052 if isinstance(filename, (str, bytes, os.PathLike)):
Nadeem Vawda68721012012-06-04 23:21:38 +020053 binary_file = GzipFile(filename, gz_mode, compresslevel)
54 elif hasattr(filename, "read") or hasattr(filename, "write"):
55 binary_file = GzipFile(None, gz_mode, compresslevel, filename)
56 else:
57 raise TypeError("filename must be a str or bytes object, or a file")
58
Nadeem Vawda7e126202012-05-06 15:04:01 +020059 if "t" in mode:
60 return io.TextIOWrapper(binary_file, encoding, errors, newline)
61 else:
62 return binary_file
63
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000064def write32u(output, value):
Tim Petersfb0ea522002-11-04 19:50:11 +000065 # The L format writes the bit pattern correctly whether signed
66 # or unsigned.
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000067 output.write(struct.pack("<L", value))
68
Antoine Pitrou7b969842010-09-23 16:22:51 +000069class _PaddedFile:
70 """Minimal read-only file object that prepends a string to the contents
71 of an actual file. Shouldn't be used outside of gzip.py, as it lacks
72 essential functionality."""
73
74 def __init__(self, f, prepend=b''):
75 self._buffer = prepend
76 self._length = len(prepend)
77 self.file = f
78 self._read = 0
79
80 def read(self, size):
81 if self._read is None:
82 return self.file.read(size)
83 if self._read + size <= self._length:
84 read = self._read
85 self._read += size
86 return self._buffer[read:self._read]
87 else:
88 read = self._read
89 self._read = None
90 return self._buffer[read:] + \
91 self.file.read(size-self._length+read)
92
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +020093 def prepend(self, prepend=b''):
Antoine Pitrou7b969842010-09-23 16:22:51 +000094 if self._read is None:
95 self._buffer = prepend
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +020096 else: # Assume data was read since the last prepend() call
Antoine Pitrou7b969842010-09-23 16:22:51 +000097 self._read -= len(prepend)
98 return
Antoine Pitrou7b969842010-09-23 16:22:51 +000099 self._length = len(self._buffer)
100 self._read = 0
101
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200102 def seek(self, off):
Antoine Pitrou7b969842010-09-23 16:22:51 +0000103 self._read = None
104 self._buffer = None
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200105 return self.file.seek(off)
Antoine Pitrou7b969842010-09-23 16:22:51 +0000106
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200107 def seekable(self):
108 return True # Allows fast-forwarding even in unseekable streams
Antoine Pitrou7b969842010-09-23 16:22:51 +0000109
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200110class GzipFile(_compression.BaseStream):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000111 """The GzipFile class simulates most of the methods of a file object with
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200112 the exception of the truncate() method.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000113
Nadeem Vawda30d94b72012-02-11 23:45:10 +0200114 This class only supports opening files in binary mode. If you need to open a
Nadeem Vawda83a4dd32012-06-30 13:34:28 +0200115 compressed file in text mode, use the gzip.open() function.
Nadeem Vawda30d94b72012-02-11 23:45:10 +0200116
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000117 """
Guido van Rossum15262191997-04-30 16:04:57 +0000118
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200119 # Overridden with internal file object to be closed, if only a filename
120 # is passed in
Guido van Rossum68de3791997-07-19 20:22:23 +0000121 myfileobj = None
122
Tim Peters07e99cb2001-01-14 23:47:14 +0000123 def __init__(self, filename=None, mode=None,
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000124 compresslevel=9, fileobj=None, mtime=None):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000125 """Constructor for the GzipFile class.
126
127 At least one of fileobj and filename must be given a
128 non-trivial value.
129
130 The new class instance is based on fileobj, which can be a regular
Serhiy Storchaka50254c52013-08-29 11:35:43 +0300131 file, an io.BytesIO object, or any other object which simulates a file.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000132 It defaults to None, in which case filename is opened to provide
133 a file object.
134
135 When fileobj is not None, the filename argument is only used to be
Martin Panter8f265652016-04-19 04:03:41 +0000136 included in the gzip file header, which may include the original
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000137 filename of the uncompressed file. It defaults to the filename of
138 fileobj, if discernible; otherwise, it defaults to the empty string,
139 and in this case the original filename is not included in the header.
140
Nadeem Vawdaee1be992013-10-19 00:11:13 +0200141 The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', 'wb', 'x', or
142 'xb' depending on whether the file will be read or written. The default
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000143 is the mode of fileobj if discernible; otherwise, the default is 'rb'.
Nadeem Vawda30d94b72012-02-11 23:45:10 +0200144 A mode of 'r' is equivalent to one of 'rb', and similarly for 'w' and
Nadeem Vawdaee1be992013-10-19 00:11:13 +0200145 'wb', 'a' and 'ab', and 'x' and 'xb'.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000146
Nadeem Vawda19e568d2012-11-11 14:04:14 +0100147 The compresslevel argument is an integer from 0 to 9 controlling the
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000148 level of compression; 1 is fastest and produces the least compression,
Nadeem Vawda19e568d2012-11-11 14:04:14 +0100149 and 9 is slowest and produces the most compression. 0 is no compression
150 at all. The default is 9.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000151
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000152 The mtime argument is an optional numeric timestamp to be written
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200153 to the last modification time field in the stream when compressing.
154 If omitted or None, the current time is used.
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000155
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000156 """
157
Nadeem Vawda30d94b72012-02-11 23:45:10 +0200158 if mode and ('t' in mode or 'U' in mode):
Nadeem Vawdabe66af42012-02-12 00:06:02 +0200159 raise ValueError("Invalid mode: {!r}".format(mode))
Skip Montanaro12424bc2002-05-23 01:43:05 +0000160 if mode and 'b' not in mode:
161 mode += 'b'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000162 if fileobj is None:
Georg Brandl1a3284e2007-12-02 09:40:06 +0000163 fileobj = self.myfileobj = builtins.open(filename, mode or 'rb')
Guido van Rossum68de3791997-07-19 20:22:23 +0000164 if filename is None:
Nadeem Vawda103e8112012-06-20 01:35:22 +0200165 filename = getattr(fileobj, 'name', '')
166 if not isinstance(filename, (str, bytes)):
Nadeem Vawda892b0b92012-01-18 09:25:58 +0200167 filename = ''
Berker Peksag03020cf2016-10-02 13:47:58 +0300168 else:
169 filename = os.fspath(filename)
Guido van Rossum68de3791997-07-19 20:22:23 +0000170 if mode is None:
Nadeem Vawdabe66af42012-02-12 00:06:02 +0200171 mode = getattr(fileobj, 'mode', 'rb')
Guido van Rossum68de3791997-07-19 20:22:23 +0000172
Nadeem Vawdabe66af42012-02-12 00:06:02 +0200173 if mode.startswith('r'):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000174 self.mode = READ
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200175 raw = _GzipReader(fileobj)
176 self._buffer = io.BufferedReader(raw)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000177 self.name = filename
Guido van Rossum15262191997-04-30 16:04:57 +0000178
Nadeem Vawdaee1be992013-10-19 00:11:13 +0200179 elif mode.startswith(('w', 'a', 'x')):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000180 self.mode = WRITE
181 self._init_write(filename)
182 self.compress = zlib.compressobj(compresslevel,
Tim Peters07e99cb2001-01-14 23:47:14 +0000183 zlib.DEFLATED,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000184 -zlib.MAX_WBITS,
185 zlib.DEF_MEM_LEVEL,
186 0)
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200187 self._write_mtime = mtime
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000188 else:
Nadeem Vawdabe66af42012-02-12 00:06:02 +0200189 raise ValueError("Invalid mode: {!r}".format(mode))
Guido van Rossum15262191997-04-30 16:04:57 +0000190
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000191 self.fileobj = fileobj
Guido van Rossum15262191997-04-30 16:04:57 +0000192
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000193 if self.mode == WRITE:
194 self._write_gzip_header()
Guido van Rossum15262191997-04-30 16:04:57 +0000195
Thomas Wouterscf297e42007-02-23 15:07:44 +0000196 @property
197 def filename(self):
198 import warnings
Philip Jenveya394f2d2009-05-08 03:57:12 +0000199 warnings.warn("use the name attribute", DeprecationWarning, 2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000200 if self.mode == WRITE and self.name[-3:] != ".gz":
201 return self.name + ".gz"
202 return self.name
203
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200204 @property
205 def mtime(self):
206 """Last modification time read from stream, or None"""
207 return self._buffer.raw._last_mtime
208
Guido van Rossum15262191997-04-30 16:04:57 +0000209 def __repr__(self):
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200210 s = repr(self.fileobj)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000211 return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
Guido van Rossum15262191997-04-30 16:04:57 +0000212
213 def _init_write(self, filename):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000214 self.name = filename
Martin Panterb82032f2015-12-11 05:19:29 +0000215 self.crc = zlib.crc32(b"")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000216 self.size = 0
217 self.writebuf = []
218 self.bufsize = 0
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200219 self.offset = 0 # Current file offset for seek(), tell(), etc
Guido van Rossum15262191997-04-30 16:04:57 +0000220
221 def _write_gzip_header(self):
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000222 self.fileobj.write(b'\037\213') # magic header
223 self.fileobj.write(b'\010') # compression method
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000224 try:
Lars Gustäbelead70562007-08-13 09:05:16 +0000225 # RFC 1952 requires the FNAME field to be Latin-1. Do not
226 # include filenames that cannot be represented that way.
Lars Gustäbel1440df22009-10-29 09:39:47 +0000227 fname = os.path.basename(self.name)
Nadeem Vawda103e8112012-06-20 01:35:22 +0200228 if not isinstance(fname, bytes):
229 fname = fname.encode('latin-1')
Lars Gustäbelead70562007-08-13 09:05:16 +0000230 if fname.endswith(b'.gz'):
231 fname = fname[:-3]
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000232 except UnicodeEncodeError:
Lars Gustäbelead70562007-08-13 09:05:16 +0000233 fname = b''
234 flags = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000235 if fname:
236 flags = FNAME
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000237 self.fileobj.write(chr(flags).encode('latin-1'))
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200238 mtime = self._write_mtime
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000239 if mtime is None:
240 mtime = time.time()
241 write32u(self.fileobj, int(mtime))
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000242 self.fileobj.write(b'\002')
243 self.fileobj.write(b'\377')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000244 if fname:
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000245 self.fileobj.write(fname + b'\000')
Guido van Rossum15262191997-04-30 16:04:57 +0000246
Guido van Rossum15262191997-04-30 16:04:57 +0000247 def write(self,data):
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200248 self._check_not_closed()
Martin v. Löwisdb044892002-03-11 06:46:52 +0000249 if self.mode != WRITE:
250 import errno
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200251 raise OSError(errno.EBADF, "write() on read-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000252
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000253 if self.fileobj is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000254 raise ValueError("write() on closed GzipFile object")
Antoine Pitroub1f88352010-01-03 22:37:40 +0000255
Serhiy Storchakabca63b32015-03-23 14:59:48 +0200256 if isinstance(data, bytes):
257 length = len(data)
258 else:
259 # accept any data that supports the buffer protocol
260 data = memoryview(data)
261 length = data.nbytes
Antoine Pitroub1f88352010-01-03 22:37:40 +0000262
Serhiy Storchakabca63b32015-03-23 14:59:48 +0200263 if length > 0:
264 self.fileobj.write(self.compress.compress(data))
265 self.size += length
Martin Panterb82032f2015-12-11 05:19:29 +0000266 self.crc = zlib.crc32(data, self.crc)
Serhiy Storchakabca63b32015-03-23 14:59:48 +0200267 self.offset += length
Guido van Rossum15262191997-04-30 16:04:57 +0000268
Serhiy Storchakabca63b32015-03-23 14:59:48 +0200269 return length
Antoine Pitroub1f88352010-01-03 22:37:40 +0000270
Guido van Rossum56068012000-02-02 16:51:06 +0000271 def read(self, size=-1):
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200272 self._check_not_closed()
Martin v. Löwisdb044892002-03-11 06:46:52 +0000273 if self.mode != READ:
274 import errno
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200275 raise OSError(errno.EBADF, "read() on write-only GzipFile object")
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200276 return self._buffer.read(size)
Guido van Rossum15262191997-04-30 16:04:57 +0000277
Antoine Pitrou4ec4b0c2011-04-04 21:00:37 +0200278 def read1(self, size=-1):
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200279 """Implements BufferedIOBase.read1()
280
281 Reads up to a buffer's worth of data is size is negative."""
282 self._check_not_closed()
Antoine Pitrou4ec4b0c2011-04-04 21:00:37 +0200283 if self.mode != READ:
284 import errno
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200285 raise OSError(errno.EBADF, "read1() on write-only GzipFile object")
Antoine Pitrou4ec4b0c2011-04-04 21:00:37 +0200286
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200287 if size < 0:
288 size = io.DEFAULT_BUFFER_SIZE
289 return self._buffer.read1(size)
Antoine Pitrou4ec4b0c2011-04-04 21:00:37 +0200290
Antoine Pitrouc3ed2e72010-09-29 10:49:46 +0000291 def peek(self, n):
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200292 self._check_not_closed()
Antoine Pitrouc3ed2e72010-09-29 10:49:46 +0000293 if self.mode != READ:
294 import errno
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200295 raise OSError(errno.EBADF, "peek() on write-only GzipFile object")
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200296 return self._buffer.peek(n)
Antoine Pitrou8e33fd72010-01-13 14:37:26 +0000297
Antoine Pitroub1f88352010-01-03 22:37:40 +0000298 @property
299 def closed(self):
300 return self.fileobj is None
301
Guido van Rossum15262191997-04-30 16:04:57 +0000302 def close(self):
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300303 fileobj = self.fileobj
304 if fileobj is None:
Georg Brandlb533e262008-05-25 18:19:30 +0000305 return
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300306 self.fileobj = None
307 try:
308 if self.mode == WRITE:
309 fileobj.write(self.compress.flush())
310 write32u(fileobj, self.crc)
311 # self.size may exceed 2GB, or even 4GB
312 write32u(fileobj, self.size & 0xffffffff)
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200313 elif self.mode == READ:
314 self._buffer.close()
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300315 finally:
316 myfileobj = self.myfileobj
317 if myfileobj:
318 self.myfileobj = None
319 myfileobj.close()
Guido van Rossum15262191997-04-30 16:04:57 +0000320
Martin v. Löwisf2a8d632005-03-03 08:35:22 +0000321 def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200322 self._check_not_closed()
Martin v. Löwisf2a8d632005-03-03 08:35:22 +0000323 if self.mode == WRITE:
Tim Peterseba28be2005-03-28 01:08:02 +0000324 # Ensure the compressor's buffer is flushed
325 self.fileobj.write(self.compress.flush(zlib_mode))
Mark Dickinsona9eb87a2010-05-04 18:47:04 +0000326 self.fileobj.flush()
Guido van Rossum15262191997-04-30 16:04:57 +0000327
Tim Peters5cfb05e2004-07-27 21:02:02 +0000328 def fileno(self):
329 """Invoke the underlying file object's fileno() method.
330
331 This will raise AttributeError if the underlying file object
332 doesn't support fileno().
333 """
334 return self.fileobj.fileno()
335
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000336 def rewind(self):
337 '''Return the uncompressed stream file position indicator to the
Tim Petersab9ba272001-08-09 21:40:30 +0000338 beginning of the file'''
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000339 if self.mode != READ:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200340 raise OSError("Can't rewind in write mode")
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200341 self._buffer.seek(0)
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000342
Antoine Pitroub1f88352010-01-03 22:37:40 +0000343 def readable(self):
344 return self.mode == READ
345
346 def writable(self):
347 return self.mode == WRITE
348
349 def seekable(self):
350 return True
351
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200352 def seek(self, offset, whence=io.SEEK_SET):
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000353 if self.mode == WRITE:
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200354 if whence != io.SEEK_SET:
355 if whence == io.SEEK_CUR:
356 offset = self.offset + offset
357 else:
358 raise ValueError('Seek from end not supported')
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000359 if offset < self.offset:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200360 raise OSError('Negative seek in write mode')
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000361 count = offset - self.offset
Serhiy Storchaka5f1a5182016-09-11 14:41:02 +0300362 chunk = b'\0' * 1024
Tim Petersfb0ea522002-11-04 19:50:11 +0000363 for i in range(count // 1024):
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000364 self.write(chunk)
Serhiy Storchaka5f1a5182016-09-11 14:41:02 +0300365 self.write(b'\0' * (count % 1024))
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000366 elif self.mode == READ:
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200367 self._check_not_closed()
368 return self._buffer.seek(offset, whence)
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000369
Antoine Pitroub1f88352010-01-03 22:37:40 +0000370 return self.offset
371
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000372 def readline(self, size=-1):
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200373 self._check_not_closed()
374 return self._buffer.readline(size)
375
376
377class _GzipReader(_compression.DecompressReader):
378 def __init__(self, fp):
379 super().__init__(_PaddedFile(fp), zlib.decompressobj,
380 wbits=-zlib.MAX_WBITS)
381 # Set flag indicating start of a new member
382 self._new_member = True
383 self._last_mtime = None
384
385 def _init_read(self):
Martin Panterb82032f2015-12-11 05:19:29 +0000386 self._crc = zlib.crc32(b"")
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200387 self._stream_size = 0 # Decompressed size of unconcatenated stream
388
389 def _read_exact(self, n):
390 '''Read exactly *n* bytes from `self._fp`
391
392 This method is required because self._fp may be unbuffered,
393 i.e. return short reads.
394 '''
395
396 data = self._fp.read(n)
397 while len(data) < n:
398 b = self._fp.read(n - len(data))
399 if not b:
400 raise EOFError("Compressed file ended before the "
401 "end-of-stream marker was reached")
402 data += b
403 return data
404
405 def _read_gzip_header(self):
406 magic = self._fp.read(2)
407 if magic == b'':
408 return False
409
410 if magic != b'\037\213':
411 raise OSError('Not a gzipped file (%r)' % magic)
412
413 (method, flag,
414 self._last_mtime) = struct.unpack("<BBIxx", self._read_exact(8))
415 if method != 8:
416 raise OSError('Unknown compression method')
417
418 if flag & FEXTRA:
419 # Read & discard the extra field, if present
420 extra_len, = struct.unpack("<H", self._read_exact(2))
421 self._read_exact(extra_len)
422 if flag & FNAME:
423 # Read and discard a null-terminated string containing the filename
424 while True:
425 s = self._fp.read(1)
426 if not s or s==b'\000':
427 break
428 if flag & FCOMMENT:
429 # Read and discard a null-terminated string containing a comment
430 while True:
431 s = self._fp.read(1)
432 if not s or s==b'\000':
433 break
434 if flag & FHCRC:
435 self._read_exact(2) # Read & discard the 16-bit header CRC
436 return True
437
438 def read(self, size=-1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000439 if size < 0:
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200440 return self.readall()
441 # size=0 is special because decompress(max_length=0) is not supported
442 if not size:
443 return b""
Antoine Pitroub1f88352010-01-03 22:37:40 +0000444
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200445 # For certain input data, a single
446 # call to decompress() may not return
447 # any data. In this case, retry until we get some data or reach EOF.
448 while True:
449 if self._decompressor.eof:
450 # Ending case: we've come to the end of a member in the file,
451 # so finish up this member, and read a new gzip header.
452 # Check the CRC and file size, and set the flag so we read
453 # a new member
454 self._read_eof()
455 self._new_member = True
456 self._decompressor = self._decomp_factory(
457 **self._decomp_args)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000458
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200459 if self._new_member:
460 # If the _new_member flag is set, we have to
461 # jump to the next member, if there is one.
462 self._init_read()
463 if not self._read_gzip_header():
464 self._size = self._pos
465 return b""
466 self._new_member = False
Guido van Rossum15262191997-04-30 16:04:57 +0000467
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200468 # Read a chunk of data from the file
469 buf = self._fp.read(io.DEFAULT_BUFFER_SIZE)
470
471 uncompress = self._decompressor.decompress(buf, size)
472 if self._decompressor.unconsumed_tail != b"":
473 self._fp.prepend(self._decompressor.unconsumed_tail)
474 elif self._decompressor.unused_data != b"":
475 # Prepend the already read bytes to the fileobj so they can
476 # be seen by _read_eof() and _read_gzip_header()
477 self._fp.prepend(self._decompressor.unused_data)
478
479 if uncompress != b"":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000480 break
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200481 if buf == b"":
482 raise EOFError("Compressed file ended before the "
483 "end-of-stream marker was reached")
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000484
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200485 self._add_read_data( uncompress )
486 self._pos += len(uncompress)
487 return uncompress
Tim Peters07e99cb2001-01-14 23:47:14 +0000488
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200489 def _add_read_data(self, data):
Martin Panterb82032f2015-12-11 05:19:29 +0000490 self._crc = zlib.crc32(data, self._crc)
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200491 self._stream_size = self._stream_size + len(data)
492
493 def _read_eof(self):
494 # We've read to the end of the file
495 # We check the that the computed CRC and size of the
496 # uncompressed data matches the stored values. Note that the size
497 # stored is the true file size mod 2**32.
498 crc32, isize = struct.unpack("<II", self._read_exact(8))
499 if crc32 != self._crc:
500 raise OSError("CRC check failed %s != %s" % (hex(crc32),
501 hex(self._crc)))
502 elif isize != (self._stream_size & 0xffffffff):
503 raise OSError("Incorrect length of data produced")
504
505 # Gzip files can be padded with zeroes and still have archives.
506 # Consume all zero bytes and set the file position to the first
507 # non-zero byte. See http://www.gzip.org/#faq8
508 c = b"\x00"
509 while c == b"\x00":
510 c = self._fp.read(1)
511 if c:
512 self._fp.prepend(c)
513
514 def _rewind(self):
515 super()._rewind()
516 self._new_member = True
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000517
Antoine Pitrou79c5ef12010-08-17 21:10:05 +0000518def compress(data, compresslevel=9):
519 """Compress data in one shot and return the compressed string.
Nadeem Vawda19e568d2012-11-11 14:04:14 +0100520 Optional argument is the compression level, in range of 0-9.
Antoine Pitrou79c5ef12010-08-17 21:10:05 +0000521 """
522 buf = io.BytesIO()
523 with GzipFile(fileobj=buf, mode='wb', compresslevel=compresslevel) as f:
524 f.write(data)
525 return buf.getvalue()
526
527def decompress(data):
528 """Decompress a gzip compressed string in one shot.
529 Return the decompressed string.
530 """
531 with GzipFile(fileobj=io.BytesIO(data)) as f:
532 return f.read()
533
534
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000535def _test():
536 # Act like gzip; with -d, act like gunzip.
537 # The input file is not deleted, however, nor are any other gzip
538 # options or features supported.
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000539 args = sys.argv[1:]
540 decompress = args and args[0] == "-d"
541 if decompress:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000542 args = args[1:]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000543 if not args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000544 args = ["-"]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000545 for arg in args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000546 if decompress:
547 if arg == "-":
Antoine Pitrou9d625c22009-01-04 21:11:10 +0000548 f = GzipFile(filename="", mode="rb", fileobj=sys.stdin.buffer)
549 g = sys.stdout.buffer
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000550 else:
551 if arg[-3:] != ".gz":
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000552 print("filename doesn't end in .gz:", repr(arg))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000553 continue
554 f = open(arg, "rb")
Georg Brandl1a3284e2007-12-02 09:40:06 +0000555 g = builtins.open(arg[:-3], "wb")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000556 else:
557 if arg == "-":
Antoine Pitrou9d625c22009-01-04 21:11:10 +0000558 f = sys.stdin.buffer
559 g = GzipFile(filename="", mode="wb", fileobj=sys.stdout.buffer)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000560 else:
Georg Brandl1a3284e2007-12-02 09:40:06 +0000561 f = builtins.open(arg, "rb")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000562 g = open(arg + ".gz", "wb")
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000563 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000564 chunk = f.read(1024)
565 if not chunk:
566 break
567 g.write(chunk)
Antoine Pitrouecc47572012-08-30 00:29:24 +0200568 if g is not sys.stdout.buffer:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000569 g.close()
Antoine Pitrouecc47572012-08-30 00:29:24 +0200570 if f is not sys.stdin.buffer:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000571 f.close()
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000572
573if __name__ == '__main__':
574 _test()