blob: 9a4e0f9c00c580ddf3a2ce1ba2f7e7445afd648d [file] [log] [blame]
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00001"""Functions that read and write gzipped files.
2
Guido van Rossum54f22ed2000-02-04 15:10:34 +00003The user of the file doesn't have to worry about the compression,
4but random access is not allowed."""
5
6# based on Andrew Kuchling's minigzip.py distributed with the zlib module
7
Lars Gustäbel1440df22009-10-29 09:39:47 +00008import struct, sys, time, os
Guido van Rossum15262191997-04-30 16:04:57 +00009import zlib
Georg Brandl1a3284e2007-12-02 09:40:06 +000010import builtins
Antoine Pitroub1f88352010-01-03 22:37:40 +000011import io
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +020012import _compression
Guido van Rossum15262191997-04-30 16:04:57 +000013
Zackery Spytzcf599f62019-05-13 01:50:52 -060014__all__ = ["BadGzipFile", "GzipFile", "open", "compress", "decompress"]
Skip Montanaro2dd42762001-01-23 15:35:05 +000015
Guido van Rossum15262191997-04-30 16:04:57 +000016FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
17
18READ, WRITE = 1, 2
19
Stéphane Wirtel3e28eed2018-11-03 16:24:23 +010020_COMPRESS_LEVEL_FAST = 1
21_COMPRESS_LEVEL_TRADEOFF = 6
22_COMPRESS_LEVEL_BEST = 9
23
24
25def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_BEST,
Nadeem Vawda7e126202012-05-06 15:04:01 +020026 encoding=None, errors=None, newline=None):
27 """Open a gzip-compressed file in binary or text mode.
28
Nadeem Vawda68721012012-06-04 23:21:38 +020029 The filename argument can be an actual filename (a str or bytes object), or
30 an existing file object to read from or write to.
31
Nadeem Vawdaee1be992013-10-19 00:11:13 +020032 The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or "ab" for
33 binary mode, or "rt", "wt", "xt" or "at" for text mode. The default mode is
34 "rb", and the default compresslevel is 9.
Nadeem Vawda7e126202012-05-06 15:04:01 +020035
36 For binary mode, this function is equivalent to the GzipFile constructor:
37 GzipFile(filename, mode, compresslevel). In this case, the encoding, errors
38 and newline arguments must not be provided.
39
40 For text mode, a GzipFile object is created, and wrapped in an
41 io.TextIOWrapper instance with the specified encoding, error handling
42 behavior, and line ending(s).
43
44 """
45 if "t" in mode:
46 if "b" in mode:
47 raise ValueError("Invalid mode: %r" % (mode,))
48 else:
49 if encoding is not None:
50 raise ValueError("Argument 'encoding' not supported in binary mode")
51 if errors is not None:
52 raise ValueError("Argument 'errors' not supported in binary mode")
53 if newline is not None:
54 raise ValueError("Argument 'newline' not supported in binary mode")
Nadeem Vawda68721012012-06-04 23:21:38 +020055
56 gz_mode = mode.replace("t", "")
Berker Peksag03020cf2016-10-02 13:47:58 +030057 if isinstance(filename, (str, bytes, os.PathLike)):
Nadeem Vawda68721012012-06-04 23:21:38 +020058 binary_file = GzipFile(filename, gz_mode, compresslevel)
59 elif hasattr(filename, "read") or hasattr(filename, "write"):
60 binary_file = GzipFile(None, gz_mode, compresslevel, filename)
61 else:
62 raise TypeError("filename must be a str or bytes object, or a file")
63
Nadeem Vawda7e126202012-05-06 15:04:01 +020064 if "t" in mode:
Inada Naoki48274832021-03-29 12:28:14 +090065 encoding = io.text_encoding(encoding)
Nadeem Vawda7e126202012-05-06 15:04:01 +020066 return io.TextIOWrapper(binary_file, encoding, errors, newline)
67 else:
68 return binary_file
69
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000070def write32u(output, value):
Tim Petersfb0ea522002-11-04 19:50:11 +000071 # The L format writes the bit pattern correctly whether signed
72 # or unsigned.
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000073 output.write(struct.pack("<L", value))
74
Antoine Pitrou7b969842010-09-23 16:22:51 +000075class _PaddedFile:
76 """Minimal read-only file object that prepends a string to the contents
77 of an actual file. Shouldn't be used outside of gzip.py, as it lacks
78 essential functionality."""
79
80 def __init__(self, f, prepend=b''):
81 self._buffer = prepend
82 self._length = len(prepend)
83 self.file = f
84 self._read = 0
85
86 def read(self, size):
87 if self._read is None:
88 return self.file.read(size)
89 if self._read + size <= self._length:
90 read = self._read
91 self._read += size
92 return self._buffer[read:self._read]
93 else:
94 read = self._read
95 self._read = None
96 return self._buffer[read:] + \
97 self.file.read(size-self._length+read)
98
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +020099 def prepend(self, prepend=b''):
Antoine Pitrou7b969842010-09-23 16:22:51 +0000100 if self._read is None:
101 self._buffer = prepend
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200102 else: # Assume data was read since the last prepend() call
Antoine Pitrou7b969842010-09-23 16:22:51 +0000103 self._read -= len(prepend)
104 return
Antoine Pitrou7b969842010-09-23 16:22:51 +0000105 self._length = len(self._buffer)
106 self._read = 0
107
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200108 def seek(self, off):
Antoine Pitrou7b969842010-09-23 16:22:51 +0000109 self._read = None
110 self._buffer = None
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200111 return self.file.seek(off)
Antoine Pitrou7b969842010-09-23 16:22:51 +0000112
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200113 def seekable(self):
114 return True # Allows fast-forwarding even in unseekable streams
Antoine Pitrou7b969842010-09-23 16:22:51 +0000115
Zackery Spytzcf599f62019-05-13 01:50:52 -0600116
117class BadGzipFile(OSError):
118 """Exception raised in some cases for invalid gzip files."""
119
120
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200121class GzipFile(_compression.BaseStream):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000122 """The GzipFile class simulates most of the methods of a file object with
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200123 the exception of the truncate() method.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000124
Nadeem Vawda30d94b72012-02-11 23:45:10 +0200125 This class only supports opening files in binary mode. If you need to open a
Nadeem Vawda83a4dd32012-06-30 13:34:28 +0200126 compressed file in text mode, use the gzip.open() function.
Nadeem Vawda30d94b72012-02-11 23:45:10 +0200127
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000128 """
Guido van Rossum15262191997-04-30 16:04:57 +0000129
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200130 # Overridden with internal file object to be closed, if only a filename
131 # is passed in
Guido van Rossum68de3791997-07-19 20:22:23 +0000132 myfileobj = None
133
Tim Peters07e99cb2001-01-14 23:47:14 +0000134 def __init__(self, filename=None, mode=None,
Stéphane Wirtel3e28eed2018-11-03 16:24:23 +0100135 compresslevel=_COMPRESS_LEVEL_BEST, fileobj=None, mtime=None):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000136 """Constructor for the GzipFile class.
137
138 At least one of fileobj and filename must be given a
139 non-trivial value.
140
141 The new class instance is based on fileobj, which can be a regular
Serhiy Storchaka50254c52013-08-29 11:35:43 +0300142 file, an io.BytesIO object, or any other object which simulates a file.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000143 It defaults to None, in which case filename is opened to provide
144 a file object.
145
146 When fileobj is not None, the filename argument is only used to be
Martin Panter8f265652016-04-19 04:03:41 +0000147 included in the gzip file header, which may include the original
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000148 filename of the uncompressed file. It defaults to the filename of
149 fileobj, if discernible; otherwise, it defaults to the empty string,
150 and in this case the original filename is not included in the header.
151
Nadeem Vawdaee1be992013-10-19 00:11:13 +0200152 The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', 'wb', 'x', or
153 'xb' depending on whether the file will be read or written. The default
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000154 is the mode of fileobj if discernible; otherwise, the default is 'rb'.
Nadeem Vawda30d94b72012-02-11 23:45:10 +0200155 A mode of 'r' is equivalent to one of 'rb', and similarly for 'w' and
Nadeem Vawdaee1be992013-10-19 00:11:13 +0200156 'wb', 'a' and 'ab', and 'x' and 'xb'.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000157
Nadeem Vawda19e568d2012-11-11 14:04:14 +0100158 The compresslevel argument is an integer from 0 to 9 controlling the
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000159 level of compression; 1 is fastest and produces the least compression,
Nadeem Vawda19e568d2012-11-11 14:04:14 +0100160 and 9 is slowest and produces the most compression. 0 is no compression
161 at all. The default is 9.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000162
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000163 The mtime argument is an optional numeric timestamp to be written
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200164 to the last modification time field in the stream when compressing.
165 If omitted or None, the current time is used.
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000166
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000167 """
168
Nadeem Vawda30d94b72012-02-11 23:45:10 +0200169 if mode and ('t' in mode or 'U' in mode):
Nadeem Vawdabe66af42012-02-12 00:06:02 +0200170 raise ValueError("Invalid mode: {!r}".format(mode))
Skip Montanaro12424bc2002-05-23 01:43:05 +0000171 if mode and 'b' not in mode:
172 mode += 'b'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000173 if fileobj is None:
Georg Brandl1a3284e2007-12-02 09:40:06 +0000174 fileobj = self.myfileobj = builtins.open(filename, mode or 'rb')
Guido van Rossum68de3791997-07-19 20:22:23 +0000175 if filename is None:
Nadeem Vawda103e8112012-06-20 01:35:22 +0200176 filename = getattr(fileobj, 'name', '')
177 if not isinstance(filename, (str, bytes)):
Nadeem Vawda892b0b92012-01-18 09:25:58 +0200178 filename = ''
Berker Peksag03020cf2016-10-02 13:47:58 +0300179 else:
180 filename = os.fspath(filename)
Serhiy Storchakaa0652322019-11-16 18:56:57 +0200181 origmode = mode
Guido van Rossum68de3791997-07-19 20:22:23 +0000182 if mode is None:
Nadeem Vawdabe66af42012-02-12 00:06:02 +0200183 mode = getattr(fileobj, 'mode', 'rb')
Guido van Rossum68de3791997-07-19 20:22:23 +0000184
Nadeem Vawdabe66af42012-02-12 00:06:02 +0200185 if mode.startswith('r'):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000186 self.mode = READ
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200187 raw = _GzipReader(fileobj)
188 self._buffer = io.BufferedReader(raw)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000189 self.name = filename
Guido van Rossum15262191997-04-30 16:04:57 +0000190
Nadeem Vawdaee1be992013-10-19 00:11:13 +0200191 elif mode.startswith(('w', 'a', 'x')):
Serhiy Storchakaa0652322019-11-16 18:56:57 +0200192 if origmode is None:
193 import warnings
194 warnings.warn(
195 "GzipFile was opened for writing, but this will "
196 "change in future Python releases. "
197 "Specify the mode argument for opening it for writing.",
198 FutureWarning, 2)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000199 self.mode = WRITE
200 self._init_write(filename)
201 self.compress = zlib.compressobj(compresslevel,
Tim Peters07e99cb2001-01-14 23:47:14 +0000202 zlib.DEFLATED,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000203 -zlib.MAX_WBITS,
204 zlib.DEF_MEM_LEVEL,
205 0)
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200206 self._write_mtime = mtime
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000207 else:
Nadeem Vawdabe66af42012-02-12 00:06:02 +0200208 raise ValueError("Invalid mode: {!r}".format(mode))
Guido van Rossum15262191997-04-30 16:04:57 +0000209
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000210 self.fileobj = fileobj
Guido van Rossum15262191997-04-30 16:04:57 +0000211
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000212 if self.mode == WRITE:
William Chargineab3b3f2020-01-21 03:25:24 -0800213 self._write_gzip_header(compresslevel)
Guido van Rossum15262191997-04-30 16:04:57 +0000214
Thomas Wouterscf297e42007-02-23 15:07:44 +0000215 @property
216 def filename(self):
217 import warnings
Philip Jenveya394f2d2009-05-08 03:57:12 +0000218 warnings.warn("use the name attribute", DeprecationWarning, 2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000219 if self.mode == WRITE and self.name[-3:] != ".gz":
220 return self.name + ".gz"
221 return self.name
222
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200223 @property
224 def mtime(self):
225 """Last modification time read from stream, or None"""
226 return self._buffer.raw._last_mtime
227
Guido van Rossum15262191997-04-30 16:04:57 +0000228 def __repr__(self):
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200229 s = repr(self.fileobj)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000230 return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
Guido van Rossum15262191997-04-30 16:04:57 +0000231
232 def _init_write(self, filename):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000233 self.name = filename
Martin Panterb82032f2015-12-11 05:19:29 +0000234 self.crc = zlib.crc32(b"")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000235 self.size = 0
236 self.writebuf = []
237 self.bufsize = 0
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200238 self.offset = 0 # Current file offset for seek(), tell(), etc
Guido van Rossum15262191997-04-30 16:04:57 +0000239
William Chargineab3b3f2020-01-21 03:25:24 -0800240 def _write_gzip_header(self, compresslevel):
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000241 self.fileobj.write(b'\037\213') # magic header
242 self.fileobj.write(b'\010') # compression method
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000243 try:
Lars Gustäbelead70562007-08-13 09:05:16 +0000244 # RFC 1952 requires the FNAME field to be Latin-1. Do not
245 # include filenames that cannot be represented that way.
Lars Gustäbel1440df22009-10-29 09:39:47 +0000246 fname = os.path.basename(self.name)
Nadeem Vawda103e8112012-06-20 01:35:22 +0200247 if not isinstance(fname, bytes):
248 fname = fname.encode('latin-1')
Lars Gustäbelead70562007-08-13 09:05:16 +0000249 if fname.endswith(b'.gz'):
250 fname = fname[:-3]
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000251 except UnicodeEncodeError:
Lars Gustäbelead70562007-08-13 09:05:16 +0000252 fname = b''
253 flags = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000254 if fname:
255 flags = FNAME
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000256 self.fileobj.write(chr(flags).encode('latin-1'))
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200257 mtime = self._write_mtime
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000258 if mtime is None:
259 mtime = time.time()
260 write32u(self.fileobj, int(mtime))
William Chargineab3b3f2020-01-21 03:25:24 -0800261 if compresslevel == _COMPRESS_LEVEL_BEST:
262 xfl = b'\002'
263 elif compresslevel == _COMPRESS_LEVEL_FAST:
264 xfl = b'\004'
265 else:
266 xfl = b'\000'
267 self.fileobj.write(xfl)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000268 self.fileobj.write(b'\377')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000269 if fname:
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000270 self.fileobj.write(fname + b'\000')
Guido van Rossum15262191997-04-30 16:04:57 +0000271
Guido van Rossum15262191997-04-30 16:04:57 +0000272 def write(self,data):
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200273 self._check_not_closed()
Martin v. Löwisdb044892002-03-11 06:46:52 +0000274 if self.mode != WRITE:
275 import errno
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200276 raise OSError(errno.EBADF, "write() on read-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000277
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000278 if self.fileobj is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000279 raise ValueError("write() on closed GzipFile object")
Antoine Pitroub1f88352010-01-03 22:37:40 +0000280
Serhiy Storchakabca63b32015-03-23 14:59:48 +0200281 if isinstance(data, bytes):
282 length = len(data)
283 else:
284 # accept any data that supports the buffer protocol
285 data = memoryview(data)
286 length = data.nbytes
Antoine Pitroub1f88352010-01-03 22:37:40 +0000287
Serhiy Storchakabca63b32015-03-23 14:59:48 +0200288 if length > 0:
289 self.fileobj.write(self.compress.compress(data))
290 self.size += length
Martin Panterb82032f2015-12-11 05:19:29 +0000291 self.crc = zlib.crc32(data, self.crc)
Serhiy Storchakabca63b32015-03-23 14:59:48 +0200292 self.offset += length
Guido van Rossum15262191997-04-30 16:04:57 +0000293
Serhiy Storchakabca63b32015-03-23 14:59:48 +0200294 return length
Antoine Pitroub1f88352010-01-03 22:37:40 +0000295
Guido van Rossum56068012000-02-02 16:51:06 +0000296 def read(self, size=-1):
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200297 self._check_not_closed()
Martin v. Löwisdb044892002-03-11 06:46:52 +0000298 if self.mode != READ:
299 import errno
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200300 raise OSError(errno.EBADF, "read() on write-only GzipFile object")
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200301 return self._buffer.read(size)
Guido van Rossum15262191997-04-30 16:04:57 +0000302
Antoine Pitrou4ec4b0c2011-04-04 21:00:37 +0200303 def read1(self, size=-1):
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200304 """Implements BufferedIOBase.read1()
305
Maximilian Nöthe4f5a3492019-04-24 11:21:02 +0200306 Reads up to a buffer's worth of data if size is negative."""
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200307 self._check_not_closed()
Antoine Pitrou4ec4b0c2011-04-04 21:00:37 +0200308 if self.mode != READ:
309 import errno
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200310 raise OSError(errno.EBADF, "read1() on write-only GzipFile object")
Antoine Pitrou4ec4b0c2011-04-04 21:00:37 +0200311
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200312 if size < 0:
313 size = io.DEFAULT_BUFFER_SIZE
314 return self._buffer.read1(size)
Antoine Pitrou4ec4b0c2011-04-04 21:00:37 +0200315
Antoine Pitrouc3ed2e72010-09-29 10:49:46 +0000316 def peek(self, n):
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200317 self._check_not_closed()
Antoine Pitrouc3ed2e72010-09-29 10:49:46 +0000318 if self.mode != READ:
319 import errno
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200320 raise OSError(errno.EBADF, "peek() on write-only GzipFile object")
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200321 return self._buffer.peek(n)
Antoine Pitrou8e33fd72010-01-13 14:37:26 +0000322
Antoine Pitroub1f88352010-01-03 22:37:40 +0000323 @property
324 def closed(self):
325 return self.fileobj is None
326
Guido van Rossum15262191997-04-30 16:04:57 +0000327 def close(self):
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300328 fileobj = self.fileobj
329 if fileobj is None:
Georg Brandlb533e262008-05-25 18:19:30 +0000330 return
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300331 self.fileobj = None
332 try:
333 if self.mode == WRITE:
334 fileobj.write(self.compress.flush())
335 write32u(fileobj, self.crc)
Victor Stinner8c663fd2017-11-08 14:44:44 -0800336 # self.size may exceed 2 GiB, or even 4 GiB
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300337 write32u(fileobj, self.size & 0xffffffff)
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200338 elif self.mode == READ:
339 self._buffer.close()
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300340 finally:
341 myfileobj = self.myfileobj
342 if myfileobj:
343 self.myfileobj = None
344 myfileobj.close()
Guido van Rossum15262191997-04-30 16:04:57 +0000345
Martin v. Löwisf2a8d632005-03-03 08:35:22 +0000346 def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200347 self._check_not_closed()
Martin v. Löwisf2a8d632005-03-03 08:35:22 +0000348 if self.mode == WRITE:
Tim Peterseba28be2005-03-28 01:08:02 +0000349 # Ensure the compressor's buffer is flushed
350 self.fileobj.write(self.compress.flush(zlib_mode))
Mark Dickinsona9eb87a2010-05-04 18:47:04 +0000351 self.fileobj.flush()
Guido van Rossum15262191997-04-30 16:04:57 +0000352
Tim Peters5cfb05e2004-07-27 21:02:02 +0000353 def fileno(self):
354 """Invoke the underlying file object's fileno() method.
355
356 This will raise AttributeError if the underlying file object
357 doesn't support fileno().
358 """
359 return self.fileobj.fileno()
360
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000361 def rewind(self):
362 '''Return the uncompressed stream file position indicator to the
Tim Petersab9ba272001-08-09 21:40:30 +0000363 beginning of the file'''
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000364 if self.mode != READ:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200365 raise OSError("Can't rewind in write mode")
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200366 self._buffer.seek(0)
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000367
Antoine Pitroub1f88352010-01-03 22:37:40 +0000368 def readable(self):
369 return self.mode == READ
370
371 def writable(self):
372 return self.mode == WRITE
373
374 def seekable(self):
375 return True
376
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200377 def seek(self, offset, whence=io.SEEK_SET):
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000378 if self.mode == WRITE:
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200379 if whence != io.SEEK_SET:
380 if whence == io.SEEK_CUR:
381 offset = self.offset + offset
382 else:
383 raise ValueError('Seek from end not supported')
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000384 if offset < self.offset:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200385 raise OSError('Negative seek in write mode')
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000386 count = offset - self.offset
Serhiy Storchaka5f1a5182016-09-11 14:41:02 +0300387 chunk = b'\0' * 1024
Tim Petersfb0ea522002-11-04 19:50:11 +0000388 for i in range(count // 1024):
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000389 self.write(chunk)
Serhiy Storchaka5f1a5182016-09-11 14:41:02 +0300390 self.write(b'\0' * (count % 1024))
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000391 elif self.mode == READ:
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200392 self._check_not_closed()
393 return self._buffer.seek(offset, whence)
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000394
Antoine Pitroub1f88352010-01-03 22:37:40 +0000395 return self.offset
396
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000397 def readline(self, size=-1):
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200398 self._check_not_closed()
399 return self._buffer.readline(size)
400
Inada Naokid2a8e692021-04-13 13:51:49 +0900401 def __iter__(self):
402 self._check_not_closed()
403 return self._buffer.__iter__()
404
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200405
406class _GzipReader(_compression.DecompressReader):
407 def __init__(self, fp):
408 super().__init__(_PaddedFile(fp), zlib.decompressobj,
409 wbits=-zlib.MAX_WBITS)
410 # Set flag indicating start of a new member
411 self._new_member = True
412 self._last_mtime = None
413
414 def _init_read(self):
Martin Panterb82032f2015-12-11 05:19:29 +0000415 self._crc = zlib.crc32(b"")
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200416 self._stream_size = 0 # Decompressed size of unconcatenated stream
417
418 def _read_exact(self, n):
419 '''Read exactly *n* bytes from `self._fp`
420
421 This method is required because self._fp may be unbuffered,
422 i.e. return short reads.
423 '''
424
425 data = self._fp.read(n)
426 while len(data) < n:
427 b = self._fp.read(n - len(data))
428 if not b:
429 raise EOFError("Compressed file ended before the "
430 "end-of-stream marker was reached")
431 data += b
432 return data
433
434 def _read_gzip_header(self):
435 magic = self._fp.read(2)
436 if magic == b'':
437 return False
438
439 if magic != b'\037\213':
Zackery Spytzcf599f62019-05-13 01:50:52 -0600440 raise BadGzipFile('Not a gzipped file (%r)' % magic)
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200441
442 (method, flag,
443 self._last_mtime) = struct.unpack("<BBIxx", self._read_exact(8))
444 if method != 8:
Zackery Spytzcf599f62019-05-13 01:50:52 -0600445 raise BadGzipFile('Unknown compression method')
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200446
447 if flag & FEXTRA:
448 # Read & discard the extra field, if present
449 extra_len, = struct.unpack("<H", self._read_exact(2))
450 self._read_exact(extra_len)
451 if flag & FNAME:
452 # Read and discard a null-terminated string containing the filename
453 while True:
454 s = self._fp.read(1)
455 if not s or s==b'\000':
456 break
457 if flag & FCOMMENT:
458 # Read and discard a null-terminated string containing a comment
459 while True:
460 s = self._fp.read(1)
461 if not s or s==b'\000':
462 break
463 if flag & FHCRC:
464 self._read_exact(2) # Read & discard the 16-bit header CRC
465 return True
466
467 def read(self, size=-1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000468 if size < 0:
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200469 return self.readall()
470 # size=0 is special because decompress(max_length=0) is not supported
471 if not size:
472 return b""
Antoine Pitroub1f88352010-01-03 22:37:40 +0000473
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200474 # For certain input data, a single
475 # call to decompress() may not return
476 # any data. In this case, retry until we get some data or reach EOF.
477 while True:
478 if self._decompressor.eof:
479 # Ending case: we've come to the end of a member in the file,
480 # so finish up this member, and read a new gzip header.
481 # Check the CRC and file size, and set the flag so we read
482 # a new member
483 self._read_eof()
484 self._new_member = True
485 self._decompressor = self._decomp_factory(
486 **self._decomp_args)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000487
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200488 if self._new_member:
489 # If the _new_member flag is set, we have to
490 # jump to the next member, if there is one.
491 self._init_read()
492 if not self._read_gzip_header():
493 self._size = self._pos
494 return b""
495 self._new_member = False
Guido van Rossum15262191997-04-30 16:04:57 +0000496
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200497 # Read a chunk of data from the file
498 buf = self._fp.read(io.DEFAULT_BUFFER_SIZE)
499
500 uncompress = self._decompressor.decompress(buf, size)
501 if self._decompressor.unconsumed_tail != b"":
502 self._fp.prepend(self._decompressor.unconsumed_tail)
503 elif self._decompressor.unused_data != b"":
504 # Prepend the already read bytes to the fileobj so they can
505 # be seen by _read_eof() and _read_gzip_header()
506 self._fp.prepend(self._decompressor.unused_data)
507
508 if uncompress != b"":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000509 break
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200510 if buf == b"":
511 raise EOFError("Compressed file ended before the "
512 "end-of-stream marker was reached")
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000513
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200514 self._add_read_data( uncompress )
515 self._pos += len(uncompress)
516 return uncompress
Tim Peters07e99cb2001-01-14 23:47:14 +0000517
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200518 def _add_read_data(self, data):
Martin Panterb82032f2015-12-11 05:19:29 +0000519 self._crc = zlib.crc32(data, self._crc)
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200520 self._stream_size = self._stream_size + len(data)
521
522 def _read_eof(self):
523 # We've read to the end of the file
524 # We check the that the computed CRC and size of the
525 # uncompressed data matches the stored values. Note that the size
526 # stored is the true file size mod 2**32.
527 crc32, isize = struct.unpack("<II", self._read_exact(8))
528 if crc32 != self._crc:
Zackery Spytzcf599f62019-05-13 01:50:52 -0600529 raise BadGzipFile("CRC check failed %s != %s" % (hex(crc32),
530 hex(self._crc)))
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200531 elif isize != (self._stream_size & 0xffffffff):
Zackery Spytzcf599f62019-05-13 01:50:52 -0600532 raise BadGzipFile("Incorrect length of data produced")
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200533
534 # Gzip files can be padded with zeroes and still have archives.
535 # Consume all zero bytes and set the file position to the first
536 # non-zero byte. See http://www.gzip.org/#faq8
537 c = b"\x00"
538 while c == b"\x00":
539 c = self._fp.read(1)
540 if c:
541 self._fp.prepend(c)
542
543 def _rewind(self):
544 super()._rewind()
545 self._new_member = True
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000546
guoci0e7497c2018-11-07 04:50:23 -0500547def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):
Antoine Pitrou79c5ef12010-08-17 21:10:05 +0000548 """Compress data in one shot and return the compressed string.
Nadeem Vawda19e568d2012-11-11 14:04:14 +0100549 Optional argument is the compression level, in range of 0-9.
Antoine Pitrou79c5ef12010-08-17 21:10:05 +0000550 """
551 buf = io.BytesIO()
guoci0e7497c2018-11-07 04:50:23 -0500552 with GzipFile(fileobj=buf, mode='wb', compresslevel=compresslevel, mtime=mtime) as f:
Antoine Pitrou79c5ef12010-08-17 21:10:05 +0000553 f.write(data)
554 return buf.getvalue()
555
556def decompress(data):
557 """Decompress a gzip compressed string in one shot.
558 Return the decompressed string.
559 """
560 with GzipFile(fileobj=io.BytesIO(data)) as f:
561 return f.read()
562
563
Stéphane Wirtele8bbc522018-10-10 00:41:33 +0200564def main():
565 from argparse import ArgumentParser
566 parser = ArgumentParser(description=
567 "A simple command line interface for the gzip module: act like gzip, "
568 "but do not delete the input file.")
Stéphane Wirtel3e28eed2018-11-03 16:24:23 +0100569 group = parser.add_mutually_exclusive_group()
570 group.add_argument('--fast', action='store_true', help='compress faster')
571 group.add_argument('--best', action='store_true', help='compress better')
572 group.add_argument("-d", "--decompress", action="store_true",
Stéphane Wirtele8bbc522018-10-10 00:41:33 +0200573 help="act like gunzip instead of gzip")
Stéphane Wirtel3e28eed2018-11-03 16:24:23 +0100574
Stéphane Wirtele8bbc522018-10-10 00:41:33 +0200575 parser.add_argument("args", nargs="*", default=["-"], metavar='file')
576 args = parser.parse_args()
Stéphane Wirtel3e28eed2018-11-03 16:24:23 +0100577
578 compresslevel = _COMPRESS_LEVEL_TRADEOFF
579 if args.fast:
580 compresslevel = _COMPRESS_LEVEL_FAST
581 elif args.best:
582 compresslevel = _COMPRESS_LEVEL_BEST
583
Stéphane Wirtele8bbc522018-10-10 00:41:33 +0200584 for arg in args.args:
585 if args.decompress:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000586 if arg == "-":
Antoine Pitrou9d625c22009-01-04 21:11:10 +0000587 f = GzipFile(filename="", mode="rb", fileobj=sys.stdin.buffer)
588 g = sys.stdout.buffer
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000589 else:
590 if arg[-3:] != ".gz":
Inada Naoki9525a182021-02-26 11:09:06 +0900591 sys.exit(f"filename doesn't end in .gz: {arg!r}")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000592 f = open(arg, "rb")
Georg Brandl1a3284e2007-12-02 09:40:06 +0000593 g = builtins.open(arg[:-3], "wb")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000594 else:
595 if arg == "-":
Antoine Pitrou9d625c22009-01-04 21:11:10 +0000596 f = sys.stdin.buffer
Stéphane Wirtel3e28eed2018-11-03 16:24:23 +0100597 g = GzipFile(filename="", mode="wb", fileobj=sys.stdout.buffer,
598 compresslevel=compresslevel)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000599 else:
Georg Brandl1a3284e2007-12-02 09:40:06 +0000600 f = builtins.open(arg, "rb")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000601 g = open(arg + ".gz", "wb")
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000602 while True:
Ruben Vorderman7956ef82021-02-26 13:17:51 +0100603 chunk = f.read(io.DEFAULT_BUFFER_SIZE)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000604 if not chunk:
605 break
606 g.write(chunk)
Antoine Pitrouecc47572012-08-30 00:29:24 +0200607 if g is not sys.stdout.buffer:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000608 g.close()
Antoine Pitrouecc47572012-08-30 00:29:24 +0200609 if f is not sys.stdin.buffer:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000610 f.close()
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000611
612if __name__ == '__main__':
Stéphane Wirtele8bbc522018-10-10 00:41:33 +0200613 main()