blob: 0a8993ba354711c93a4ed04aebf944d86be7c498 [file] [log] [blame]
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00001"""Functions that read and write gzipped files.
2
Guido van Rossum54f22ed2000-02-04 15:10:34 +00003The user of the file doesn't have to worry about the compression,
4but random access is not allowed."""
5
6# based on Andrew Kuchling's minigzip.py distributed with the zlib module
7
Lars Gustäbel1440df22009-10-29 09:39:47 +00008import struct, sys, time, os
Guido van Rossum15262191997-04-30 16:04:57 +00009import zlib
Georg Brandl1a3284e2007-12-02 09:40:06 +000010import builtins
Antoine Pitroub1f88352010-01-03 22:37:40 +000011import io
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +020012import _compression
Guido van Rossum15262191997-04-30 16:04:57 +000013
Zackery Spytzcf599f62019-05-13 01:50:52 -060014__all__ = ["BadGzipFile", "GzipFile", "open", "compress", "decompress"]
Skip Montanaro2dd42762001-01-23 15:35:05 +000015
Guido van Rossum15262191997-04-30 16:04:57 +000016FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
17
18READ, WRITE = 1, 2
19
Stéphane Wirtel3e28eed2018-11-03 16:24:23 +010020_COMPRESS_LEVEL_FAST = 1
21_COMPRESS_LEVEL_TRADEOFF = 6
22_COMPRESS_LEVEL_BEST = 9
23
24
25def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_BEST,
Nadeem Vawda7e126202012-05-06 15:04:01 +020026 encoding=None, errors=None, newline=None):
27 """Open a gzip-compressed file in binary or text mode.
28
Nadeem Vawda68721012012-06-04 23:21:38 +020029 The filename argument can be an actual filename (a str or bytes object), or
30 an existing file object to read from or write to.
31
Nadeem Vawdaee1be992013-10-19 00:11:13 +020032 The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or "ab" for
33 binary mode, or "rt", "wt", "xt" or "at" for text mode. The default mode is
34 "rb", and the default compresslevel is 9.
Nadeem Vawda7e126202012-05-06 15:04:01 +020035
36 For binary mode, this function is equivalent to the GzipFile constructor:
37 GzipFile(filename, mode, compresslevel). In this case, the encoding, errors
38 and newline arguments must not be provided.
39
40 For text mode, a GzipFile object is created, and wrapped in an
41 io.TextIOWrapper instance with the specified encoding, error handling
42 behavior, and line ending(s).
43
44 """
45 if "t" in mode:
46 if "b" in mode:
47 raise ValueError("Invalid mode: %r" % (mode,))
48 else:
49 if encoding is not None:
50 raise ValueError("Argument 'encoding' not supported in binary mode")
51 if errors is not None:
52 raise ValueError("Argument 'errors' not supported in binary mode")
53 if newline is not None:
54 raise ValueError("Argument 'newline' not supported in binary mode")
Nadeem Vawda68721012012-06-04 23:21:38 +020055
56 gz_mode = mode.replace("t", "")
Berker Peksag03020cf2016-10-02 13:47:58 +030057 if isinstance(filename, (str, bytes, os.PathLike)):
Nadeem Vawda68721012012-06-04 23:21:38 +020058 binary_file = GzipFile(filename, gz_mode, compresslevel)
59 elif hasattr(filename, "read") or hasattr(filename, "write"):
60 binary_file = GzipFile(None, gz_mode, compresslevel, filename)
61 else:
62 raise TypeError("filename must be a str or bytes object, or a file")
63
Nadeem Vawda7e126202012-05-06 15:04:01 +020064 if "t" in mode:
Inada Naoki48274832021-03-29 12:28:14 +090065 encoding = io.text_encoding(encoding)
Nadeem Vawda7e126202012-05-06 15:04:01 +020066 return io.TextIOWrapper(binary_file, encoding, errors, newline)
67 else:
68 return binary_file
69
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000070def write32u(output, value):
Tim Petersfb0ea522002-11-04 19:50:11 +000071 # The L format writes the bit pattern correctly whether signed
72 # or unsigned.
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000073 output.write(struct.pack("<L", value))
74
Antoine Pitrou7b969842010-09-23 16:22:51 +000075class _PaddedFile:
76 """Minimal read-only file object that prepends a string to the contents
77 of an actual file. Shouldn't be used outside of gzip.py, as it lacks
78 essential functionality."""
79
80 def __init__(self, f, prepend=b''):
81 self._buffer = prepend
82 self._length = len(prepend)
83 self.file = f
84 self._read = 0
85
86 def read(self, size):
87 if self._read is None:
88 return self.file.read(size)
89 if self._read + size <= self._length:
90 read = self._read
91 self._read += size
92 return self._buffer[read:self._read]
93 else:
94 read = self._read
95 self._read = None
96 return self._buffer[read:] + \
97 self.file.read(size-self._length+read)
98
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +020099 def prepend(self, prepend=b''):
Antoine Pitrou7b969842010-09-23 16:22:51 +0000100 if self._read is None:
101 self._buffer = prepend
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200102 else: # Assume data was read since the last prepend() call
Antoine Pitrou7b969842010-09-23 16:22:51 +0000103 self._read -= len(prepend)
104 return
Antoine Pitrou7b969842010-09-23 16:22:51 +0000105 self._length = len(self._buffer)
106 self._read = 0
107
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200108 def seek(self, off):
Antoine Pitrou7b969842010-09-23 16:22:51 +0000109 self._read = None
110 self._buffer = None
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200111 return self.file.seek(off)
Antoine Pitrou7b969842010-09-23 16:22:51 +0000112
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200113 def seekable(self):
114 return True # Allows fast-forwarding even in unseekable streams
Antoine Pitrou7b969842010-09-23 16:22:51 +0000115
Zackery Spytzcf599f62019-05-13 01:50:52 -0600116
117class BadGzipFile(OSError):
118 """Exception raised in some cases for invalid gzip files."""
119
120
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200121class GzipFile(_compression.BaseStream):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000122 """The GzipFile class simulates most of the methods of a file object with
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200123 the exception of the truncate() method.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000124
Nadeem Vawda30d94b72012-02-11 23:45:10 +0200125 This class only supports opening files in binary mode. If you need to open a
Nadeem Vawda83a4dd32012-06-30 13:34:28 +0200126 compressed file in text mode, use the gzip.open() function.
Nadeem Vawda30d94b72012-02-11 23:45:10 +0200127
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000128 """
Guido van Rossum15262191997-04-30 16:04:57 +0000129
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200130 # Overridden with internal file object to be closed, if only a filename
131 # is passed in
Guido van Rossum68de3791997-07-19 20:22:23 +0000132 myfileobj = None
133
Tim Peters07e99cb2001-01-14 23:47:14 +0000134 def __init__(self, filename=None, mode=None,
Stéphane Wirtel3e28eed2018-11-03 16:24:23 +0100135 compresslevel=_COMPRESS_LEVEL_BEST, fileobj=None, mtime=None):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000136 """Constructor for the GzipFile class.
137
138 At least one of fileobj and filename must be given a
139 non-trivial value.
140
141 The new class instance is based on fileobj, which can be a regular
Serhiy Storchaka50254c52013-08-29 11:35:43 +0300142 file, an io.BytesIO object, or any other object which simulates a file.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000143 It defaults to None, in which case filename is opened to provide
144 a file object.
145
146 When fileobj is not None, the filename argument is only used to be
Martin Panter8f265652016-04-19 04:03:41 +0000147 included in the gzip file header, which may include the original
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000148 filename of the uncompressed file. It defaults to the filename of
149 fileobj, if discernible; otherwise, it defaults to the empty string,
150 and in this case the original filename is not included in the header.
151
Nadeem Vawdaee1be992013-10-19 00:11:13 +0200152 The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', 'wb', 'x', or
153 'xb' depending on whether the file will be read or written. The default
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000154 is the mode of fileobj if discernible; otherwise, the default is 'rb'.
Nadeem Vawda30d94b72012-02-11 23:45:10 +0200155 A mode of 'r' is equivalent to one of 'rb', and similarly for 'w' and
Nadeem Vawdaee1be992013-10-19 00:11:13 +0200156 'wb', 'a' and 'ab', and 'x' and 'xb'.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000157
Nadeem Vawda19e568d2012-11-11 14:04:14 +0100158 The compresslevel argument is an integer from 0 to 9 controlling the
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000159 level of compression; 1 is fastest and produces the least compression,
Nadeem Vawda19e568d2012-11-11 14:04:14 +0100160 and 9 is slowest and produces the most compression. 0 is no compression
161 at all. The default is 9.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000162
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000163 The mtime argument is an optional numeric timestamp to be written
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200164 to the last modification time field in the stream when compressing.
165 If omitted or None, the current time is used.
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000166
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000167 """
168
Nadeem Vawda30d94b72012-02-11 23:45:10 +0200169 if mode and ('t' in mode or 'U' in mode):
Nadeem Vawdabe66af42012-02-12 00:06:02 +0200170 raise ValueError("Invalid mode: {!r}".format(mode))
Skip Montanaro12424bc2002-05-23 01:43:05 +0000171 if mode and 'b' not in mode:
172 mode += 'b'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000173 if fileobj is None:
Georg Brandl1a3284e2007-12-02 09:40:06 +0000174 fileobj = self.myfileobj = builtins.open(filename, mode or 'rb')
Guido van Rossum68de3791997-07-19 20:22:23 +0000175 if filename is None:
Nadeem Vawda103e8112012-06-20 01:35:22 +0200176 filename = getattr(fileobj, 'name', '')
177 if not isinstance(filename, (str, bytes)):
Nadeem Vawda892b0b92012-01-18 09:25:58 +0200178 filename = ''
Berker Peksag03020cf2016-10-02 13:47:58 +0300179 else:
180 filename = os.fspath(filename)
Serhiy Storchakaa0652322019-11-16 18:56:57 +0200181 origmode = mode
Guido van Rossum68de3791997-07-19 20:22:23 +0000182 if mode is None:
Nadeem Vawdabe66af42012-02-12 00:06:02 +0200183 mode = getattr(fileobj, 'mode', 'rb')
Guido van Rossum68de3791997-07-19 20:22:23 +0000184
Nadeem Vawdabe66af42012-02-12 00:06:02 +0200185 if mode.startswith('r'):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000186 self.mode = READ
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200187 raw = _GzipReader(fileobj)
188 self._buffer = io.BufferedReader(raw)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000189 self.name = filename
Guido van Rossum15262191997-04-30 16:04:57 +0000190
Nadeem Vawdaee1be992013-10-19 00:11:13 +0200191 elif mode.startswith(('w', 'a', 'x')):
Serhiy Storchakaa0652322019-11-16 18:56:57 +0200192 if origmode is None:
193 import warnings
194 warnings.warn(
195 "GzipFile was opened for writing, but this will "
196 "change in future Python releases. "
197 "Specify the mode argument for opening it for writing.",
198 FutureWarning, 2)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000199 self.mode = WRITE
200 self._init_write(filename)
201 self.compress = zlib.compressobj(compresslevel,
Tim Peters07e99cb2001-01-14 23:47:14 +0000202 zlib.DEFLATED,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000203 -zlib.MAX_WBITS,
204 zlib.DEF_MEM_LEVEL,
205 0)
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200206 self._write_mtime = mtime
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000207 else:
Nadeem Vawdabe66af42012-02-12 00:06:02 +0200208 raise ValueError("Invalid mode: {!r}".format(mode))
Guido van Rossum15262191997-04-30 16:04:57 +0000209
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000210 self.fileobj = fileobj
Guido van Rossum15262191997-04-30 16:04:57 +0000211
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000212 if self.mode == WRITE:
William Chargineab3b3f2020-01-21 03:25:24 -0800213 self._write_gzip_header(compresslevel)
Guido van Rossum15262191997-04-30 16:04:57 +0000214
Thomas Wouterscf297e42007-02-23 15:07:44 +0000215 @property
216 def filename(self):
217 import warnings
Philip Jenveya394f2d2009-05-08 03:57:12 +0000218 warnings.warn("use the name attribute", DeprecationWarning, 2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000219 if self.mode == WRITE and self.name[-3:] != ".gz":
220 return self.name + ".gz"
221 return self.name
222
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200223 @property
224 def mtime(self):
225 """Last modification time read from stream, or None"""
226 return self._buffer.raw._last_mtime
227
Guido van Rossum15262191997-04-30 16:04:57 +0000228 def __repr__(self):
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200229 s = repr(self.fileobj)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000230 return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
Guido van Rossum15262191997-04-30 16:04:57 +0000231
232 def _init_write(self, filename):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000233 self.name = filename
Martin Panterb82032f2015-12-11 05:19:29 +0000234 self.crc = zlib.crc32(b"")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000235 self.size = 0
236 self.writebuf = []
237 self.bufsize = 0
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200238 self.offset = 0 # Current file offset for seek(), tell(), etc
Guido van Rossum15262191997-04-30 16:04:57 +0000239
William Chargineab3b3f2020-01-21 03:25:24 -0800240 def _write_gzip_header(self, compresslevel):
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000241 self.fileobj.write(b'\037\213') # magic header
242 self.fileobj.write(b'\010') # compression method
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000243 try:
Lars Gustäbelead70562007-08-13 09:05:16 +0000244 # RFC 1952 requires the FNAME field to be Latin-1. Do not
245 # include filenames that cannot be represented that way.
Lars Gustäbel1440df22009-10-29 09:39:47 +0000246 fname = os.path.basename(self.name)
Nadeem Vawda103e8112012-06-20 01:35:22 +0200247 if not isinstance(fname, bytes):
248 fname = fname.encode('latin-1')
Lars Gustäbelead70562007-08-13 09:05:16 +0000249 if fname.endswith(b'.gz'):
250 fname = fname[:-3]
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000251 except UnicodeEncodeError:
Lars Gustäbelead70562007-08-13 09:05:16 +0000252 fname = b''
253 flags = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000254 if fname:
255 flags = FNAME
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000256 self.fileobj.write(chr(flags).encode('latin-1'))
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200257 mtime = self._write_mtime
Antoine Pitrou42db3ef2009-01-04 21:37:59 +0000258 if mtime is None:
259 mtime = time.time()
260 write32u(self.fileobj, int(mtime))
William Chargineab3b3f2020-01-21 03:25:24 -0800261 if compresslevel == _COMPRESS_LEVEL_BEST:
262 xfl = b'\002'
263 elif compresslevel == _COMPRESS_LEVEL_FAST:
264 xfl = b'\004'
265 else:
266 xfl = b'\000'
267 self.fileobj.write(xfl)
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000268 self.fileobj.write(b'\377')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000269 if fname:
Lars Gustäbel5590d8c2007-08-10 12:02:32 +0000270 self.fileobj.write(fname + b'\000')
Guido van Rossum15262191997-04-30 16:04:57 +0000271
Guido van Rossum15262191997-04-30 16:04:57 +0000272 def write(self,data):
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200273 self._check_not_closed()
Martin v. Löwisdb044892002-03-11 06:46:52 +0000274 if self.mode != WRITE:
275 import errno
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200276 raise OSError(errno.EBADF, "write() on read-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000277
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000278 if self.fileobj is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000279 raise ValueError("write() on closed GzipFile object")
Antoine Pitroub1f88352010-01-03 22:37:40 +0000280
Serhiy Storchakabca63b32015-03-23 14:59:48 +0200281 if isinstance(data, bytes):
282 length = len(data)
283 else:
284 # accept any data that supports the buffer protocol
285 data = memoryview(data)
286 length = data.nbytes
Antoine Pitroub1f88352010-01-03 22:37:40 +0000287
Serhiy Storchakabca63b32015-03-23 14:59:48 +0200288 if length > 0:
289 self.fileobj.write(self.compress.compress(data))
290 self.size += length
Martin Panterb82032f2015-12-11 05:19:29 +0000291 self.crc = zlib.crc32(data, self.crc)
Serhiy Storchakabca63b32015-03-23 14:59:48 +0200292 self.offset += length
Guido van Rossum15262191997-04-30 16:04:57 +0000293
Serhiy Storchakabca63b32015-03-23 14:59:48 +0200294 return length
Antoine Pitroub1f88352010-01-03 22:37:40 +0000295
Guido van Rossum56068012000-02-02 16:51:06 +0000296 def read(self, size=-1):
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200297 self._check_not_closed()
Martin v. Löwisdb044892002-03-11 06:46:52 +0000298 if self.mode != READ:
299 import errno
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200300 raise OSError(errno.EBADF, "read() on write-only GzipFile object")
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200301 return self._buffer.read(size)
Guido van Rossum15262191997-04-30 16:04:57 +0000302
Antoine Pitrou4ec4b0c2011-04-04 21:00:37 +0200303 def read1(self, size=-1):
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200304 """Implements BufferedIOBase.read1()
305
Maximilian Nöthe4f5a3492019-04-24 11:21:02 +0200306 Reads up to a buffer's worth of data if size is negative."""
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200307 self._check_not_closed()
Antoine Pitrou4ec4b0c2011-04-04 21:00:37 +0200308 if self.mode != READ:
309 import errno
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200310 raise OSError(errno.EBADF, "read1() on write-only GzipFile object")
Antoine Pitrou4ec4b0c2011-04-04 21:00:37 +0200311
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200312 if size < 0:
313 size = io.DEFAULT_BUFFER_SIZE
314 return self._buffer.read1(size)
Antoine Pitrou4ec4b0c2011-04-04 21:00:37 +0200315
Antoine Pitrouc3ed2e72010-09-29 10:49:46 +0000316 def peek(self, n):
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200317 self._check_not_closed()
Antoine Pitrouc3ed2e72010-09-29 10:49:46 +0000318 if self.mode != READ:
319 import errno
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200320 raise OSError(errno.EBADF, "peek() on write-only GzipFile object")
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200321 return self._buffer.peek(n)
Antoine Pitrou8e33fd72010-01-13 14:37:26 +0000322
Antoine Pitroub1f88352010-01-03 22:37:40 +0000323 @property
324 def closed(self):
325 return self.fileobj is None
326
Guido van Rossum15262191997-04-30 16:04:57 +0000327 def close(self):
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300328 fileobj = self.fileobj
329 if fileobj is None:
Georg Brandlb533e262008-05-25 18:19:30 +0000330 return
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300331 self.fileobj = None
332 try:
333 if self.mode == WRITE:
334 fileobj.write(self.compress.flush())
335 write32u(fileobj, self.crc)
Victor Stinner8c663fd2017-11-08 14:44:44 -0800336 # self.size may exceed 2 GiB, or even 4 GiB
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300337 write32u(fileobj, self.size & 0xffffffff)
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200338 elif self.mode == READ:
339 self._buffer.close()
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300340 finally:
341 myfileobj = self.myfileobj
342 if myfileobj:
343 self.myfileobj = None
344 myfileobj.close()
Guido van Rossum15262191997-04-30 16:04:57 +0000345
Martin v. Löwisf2a8d632005-03-03 08:35:22 +0000346 def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200347 self._check_not_closed()
Martin v. Löwisf2a8d632005-03-03 08:35:22 +0000348 if self.mode == WRITE:
Tim Peterseba28be2005-03-28 01:08:02 +0000349 # Ensure the compressor's buffer is flushed
350 self.fileobj.write(self.compress.flush(zlib_mode))
Mark Dickinsona9eb87a2010-05-04 18:47:04 +0000351 self.fileobj.flush()
Guido van Rossum15262191997-04-30 16:04:57 +0000352
Tim Peters5cfb05e2004-07-27 21:02:02 +0000353 def fileno(self):
354 """Invoke the underlying file object's fileno() method.
355
356 This will raise AttributeError if the underlying file object
357 doesn't support fileno().
358 """
359 return self.fileobj.fileno()
360
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000361 def rewind(self):
362 '''Return the uncompressed stream file position indicator to the
Tim Petersab9ba272001-08-09 21:40:30 +0000363 beginning of the file'''
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000364 if self.mode != READ:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200365 raise OSError("Can't rewind in write mode")
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200366 self._buffer.seek(0)
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000367
Antoine Pitroub1f88352010-01-03 22:37:40 +0000368 def readable(self):
369 return self.mode == READ
370
371 def writable(self):
372 return self.mode == WRITE
373
374 def seekable(self):
375 return True
376
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200377 def seek(self, offset, whence=io.SEEK_SET):
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000378 if self.mode == WRITE:
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200379 if whence != io.SEEK_SET:
380 if whence == io.SEEK_CUR:
381 offset = self.offset + offset
382 else:
383 raise ValueError('Seek from end not supported')
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000384 if offset < self.offset:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200385 raise OSError('Negative seek in write mode')
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000386 count = offset - self.offset
Serhiy Storchaka5f1a5182016-09-11 14:41:02 +0300387 chunk = b'\0' * 1024
Tim Petersfb0ea522002-11-04 19:50:11 +0000388 for i in range(count // 1024):
Walter Dörwald5b1284d2007-06-06 16:43:59 +0000389 self.write(chunk)
Serhiy Storchaka5f1a5182016-09-11 14:41:02 +0300390 self.write(b'\0' * (count % 1024))
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000391 elif self.mode == READ:
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200392 self._check_not_closed()
393 return self._buffer.seek(offset, whence)
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000394
Antoine Pitroub1f88352010-01-03 22:37:40 +0000395 return self.offset
396
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000397 def readline(self, size=-1):
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200398 self._check_not_closed()
399 return self._buffer.readline(size)
400
401
402class _GzipReader(_compression.DecompressReader):
403 def __init__(self, fp):
404 super().__init__(_PaddedFile(fp), zlib.decompressobj,
405 wbits=-zlib.MAX_WBITS)
406 # Set flag indicating start of a new member
407 self._new_member = True
408 self._last_mtime = None
409
410 def _init_read(self):
Martin Panterb82032f2015-12-11 05:19:29 +0000411 self._crc = zlib.crc32(b"")
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200412 self._stream_size = 0 # Decompressed size of unconcatenated stream
413
414 def _read_exact(self, n):
415 '''Read exactly *n* bytes from `self._fp`
416
417 This method is required because self._fp may be unbuffered,
418 i.e. return short reads.
419 '''
420
421 data = self._fp.read(n)
422 while len(data) < n:
423 b = self._fp.read(n - len(data))
424 if not b:
425 raise EOFError("Compressed file ended before the "
426 "end-of-stream marker was reached")
427 data += b
428 return data
429
430 def _read_gzip_header(self):
431 magic = self._fp.read(2)
432 if magic == b'':
433 return False
434
435 if magic != b'\037\213':
Zackery Spytzcf599f62019-05-13 01:50:52 -0600436 raise BadGzipFile('Not a gzipped file (%r)' % magic)
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200437
438 (method, flag,
439 self._last_mtime) = struct.unpack("<BBIxx", self._read_exact(8))
440 if method != 8:
Zackery Spytzcf599f62019-05-13 01:50:52 -0600441 raise BadGzipFile('Unknown compression method')
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200442
443 if flag & FEXTRA:
444 # Read & discard the extra field, if present
445 extra_len, = struct.unpack("<H", self._read_exact(2))
446 self._read_exact(extra_len)
447 if flag & FNAME:
448 # Read and discard a null-terminated string containing the filename
449 while True:
450 s = self._fp.read(1)
451 if not s or s==b'\000':
452 break
453 if flag & FCOMMENT:
454 # Read and discard a null-terminated string containing a comment
455 while True:
456 s = self._fp.read(1)
457 if not s or s==b'\000':
458 break
459 if flag & FHCRC:
460 self._read_exact(2) # Read & discard the 16-bit header CRC
461 return True
462
463 def read(self, size=-1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000464 if size < 0:
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200465 return self.readall()
466 # size=0 is special because decompress(max_length=0) is not supported
467 if not size:
468 return b""
Antoine Pitroub1f88352010-01-03 22:37:40 +0000469
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200470 # For certain input data, a single
471 # call to decompress() may not return
472 # any data. In this case, retry until we get some data or reach EOF.
473 while True:
474 if self._decompressor.eof:
475 # Ending case: we've come to the end of a member in the file,
476 # so finish up this member, and read a new gzip header.
477 # Check the CRC and file size, and set the flag so we read
478 # a new member
479 self._read_eof()
480 self._new_member = True
481 self._decompressor = self._decomp_factory(
482 **self._decomp_args)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000483
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200484 if self._new_member:
485 # If the _new_member flag is set, we have to
486 # jump to the next member, if there is one.
487 self._init_read()
488 if not self._read_gzip_header():
489 self._size = self._pos
490 return b""
491 self._new_member = False
Guido van Rossum15262191997-04-30 16:04:57 +0000492
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200493 # Read a chunk of data from the file
494 buf = self._fp.read(io.DEFAULT_BUFFER_SIZE)
495
496 uncompress = self._decompressor.decompress(buf, size)
497 if self._decompressor.unconsumed_tail != b"":
498 self._fp.prepend(self._decompressor.unconsumed_tail)
499 elif self._decompressor.unused_data != b"":
500 # Prepend the already read bytes to the fileobj so they can
501 # be seen by _read_eof() and _read_gzip_header()
502 self._fp.prepend(self._decompressor.unused_data)
503
504 if uncompress != b"":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000505 break
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200506 if buf == b"":
507 raise EOFError("Compressed file ended before the "
508 "end-of-stream marker was reached")
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000509
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200510 self._add_read_data( uncompress )
511 self._pos += len(uncompress)
512 return uncompress
Tim Peters07e99cb2001-01-14 23:47:14 +0000513
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200514 def _add_read_data(self, data):
Martin Panterb82032f2015-12-11 05:19:29 +0000515 self._crc = zlib.crc32(data, self._crc)
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200516 self._stream_size = self._stream_size + len(data)
517
518 def _read_eof(self):
519 # We've read to the end of the file
520 # We check the that the computed CRC and size of the
521 # uncompressed data matches the stored values. Note that the size
522 # stored is the true file size mod 2**32.
523 crc32, isize = struct.unpack("<II", self._read_exact(8))
524 if crc32 != self._crc:
Zackery Spytzcf599f62019-05-13 01:50:52 -0600525 raise BadGzipFile("CRC check failed %s != %s" % (hex(crc32),
526 hex(self._crc)))
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200527 elif isize != (self._stream_size & 0xffffffff):
Zackery Spytzcf599f62019-05-13 01:50:52 -0600528 raise BadGzipFile("Incorrect length of data produced")
Antoine Pitrou2dbc6e62015-04-11 00:31:01 +0200529
530 # Gzip files can be padded with zeroes and still have archives.
531 # Consume all zero bytes and set the file position to the first
532 # non-zero byte. See http://www.gzip.org/#faq8
533 c = b"\x00"
534 while c == b"\x00":
535 c = self._fp.read(1)
536 if c:
537 self._fp.prepend(c)
538
539 def _rewind(self):
540 super()._rewind()
541 self._new_member = True
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000542
guoci0e7497c2018-11-07 04:50:23 -0500543def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):
Antoine Pitrou79c5ef12010-08-17 21:10:05 +0000544 """Compress data in one shot and return the compressed string.
Nadeem Vawda19e568d2012-11-11 14:04:14 +0100545 Optional argument is the compression level, in range of 0-9.
Antoine Pitrou79c5ef12010-08-17 21:10:05 +0000546 """
547 buf = io.BytesIO()
guoci0e7497c2018-11-07 04:50:23 -0500548 with GzipFile(fileobj=buf, mode='wb', compresslevel=compresslevel, mtime=mtime) as f:
Antoine Pitrou79c5ef12010-08-17 21:10:05 +0000549 f.write(data)
550 return buf.getvalue()
551
552def decompress(data):
553 """Decompress a gzip compressed string in one shot.
554 Return the decompressed string.
555 """
556 with GzipFile(fileobj=io.BytesIO(data)) as f:
557 return f.read()
558
559
Stéphane Wirtele8bbc522018-10-10 00:41:33 +0200560def main():
561 from argparse import ArgumentParser
562 parser = ArgumentParser(description=
563 "A simple command line interface for the gzip module: act like gzip, "
564 "but do not delete the input file.")
Stéphane Wirtel3e28eed2018-11-03 16:24:23 +0100565 group = parser.add_mutually_exclusive_group()
566 group.add_argument('--fast', action='store_true', help='compress faster')
567 group.add_argument('--best', action='store_true', help='compress better')
568 group.add_argument("-d", "--decompress", action="store_true",
Stéphane Wirtele8bbc522018-10-10 00:41:33 +0200569 help="act like gunzip instead of gzip")
Stéphane Wirtel3e28eed2018-11-03 16:24:23 +0100570
Stéphane Wirtele8bbc522018-10-10 00:41:33 +0200571 parser.add_argument("args", nargs="*", default=["-"], metavar='file')
572 args = parser.parse_args()
Stéphane Wirtel3e28eed2018-11-03 16:24:23 +0100573
574 compresslevel = _COMPRESS_LEVEL_TRADEOFF
575 if args.fast:
576 compresslevel = _COMPRESS_LEVEL_FAST
577 elif args.best:
578 compresslevel = _COMPRESS_LEVEL_BEST
579
Stéphane Wirtele8bbc522018-10-10 00:41:33 +0200580 for arg in args.args:
581 if args.decompress:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000582 if arg == "-":
Antoine Pitrou9d625c22009-01-04 21:11:10 +0000583 f = GzipFile(filename="", mode="rb", fileobj=sys.stdin.buffer)
584 g = sys.stdout.buffer
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000585 else:
586 if arg[-3:] != ".gz":
Inada Naoki9525a182021-02-26 11:09:06 +0900587 sys.exit(f"filename doesn't end in .gz: {arg!r}")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000588 f = open(arg, "rb")
Georg Brandl1a3284e2007-12-02 09:40:06 +0000589 g = builtins.open(arg[:-3], "wb")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000590 else:
591 if arg == "-":
Antoine Pitrou9d625c22009-01-04 21:11:10 +0000592 f = sys.stdin.buffer
Stéphane Wirtel3e28eed2018-11-03 16:24:23 +0100593 g = GzipFile(filename="", mode="wb", fileobj=sys.stdout.buffer,
594 compresslevel=compresslevel)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000595 else:
Georg Brandl1a3284e2007-12-02 09:40:06 +0000596 f = builtins.open(arg, "rb")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000597 g = open(arg + ".gz", "wb")
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000598 while True:
Ruben Vorderman7956ef82021-02-26 13:17:51 +0100599 chunk = f.read(io.DEFAULT_BUFFER_SIZE)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000600 if not chunk:
601 break
602 g.write(chunk)
Antoine Pitrouecc47572012-08-30 00:29:24 +0200603 if g is not sys.stdout.buffer:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000604 g.close()
Antoine Pitrouecc47572012-08-30 00:29:24 +0200605 if f is not sys.stdin.buffer:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000606 f.close()
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000607
608if __name__ == '__main__':
Stéphane Wirtele8bbc522018-10-10 00:41:33 +0200609 main()