blob: 8fdac8397d4d7e9e385dfd445abc28513619dab8 [file] [log] [blame]
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00001"""Functions that read and write gzipped files.
2
Guido van Rossum54f22ed2000-02-04 15:10:34 +00003The user of the file doesn't have to worry about the compression,
4but random access is not allowed."""
5
6# based on Andrew Kuchling's minigzip.py distributed with the zlib module
7
Lars Gustäbel8c06ccc2009-10-29 09:15:00 +00008import struct, sys, time, os
Guido van Rossum15262191997-04-30 16:04:57 +00009import zlib
Antoine Pitrou673ddf92010-01-03 22:29:56 +000010import io
Guido van Rossum68de3791997-07-19 20:22:23 +000011import __builtin__
Guido van Rossum15262191997-04-30 16:04:57 +000012
Skip Montanaro2dd42762001-01-23 15:35:05 +000013__all__ = ["GzipFile","open"]
14
Guido van Rossum15262191997-04-30 16:04:57 +000015FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
16
17READ, WRITE = 1, 2
18
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000019def write32u(output, value):
Tim Petersfb0ea522002-11-04 19:50:11 +000020 # The L format writes the bit pattern correctly whether signed
21 # or unsigned.
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000022 output.write(struct.pack("<L", value))
23
Guido van Rossum15262191997-04-30 16:04:57 +000024def read32(input):
Gregory P. Smith79b4ba82008-03-23 21:04:43 +000025 return struct.unpack("<I", input.read(4))[0]
Guido van Rossum15262191997-04-30 16:04:57 +000026
Fred Drakefa1591c1999-04-05 18:37:59 +000027def open(filename, mode="rb", compresslevel=9):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000028 """Shorthand for GzipFile(filename, mode, compresslevel).
29
30 The filename argument is required; mode defaults to 'rb'
31 and compresslevel defaults to 9.
32
33 """
Guido van Rossum15262191997-04-30 16:04:57 +000034 return GzipFile(filename, mode, compresslevel)
35
Antoine Pitrou673ddf92010-01-03 22:29:56 +000036class GzipFile(io.BufferedIOBase):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000037 """The GzipFile class simulates most of the methods of a file object with
Guido van Rossum97c5fcc2002-08-06 17:03:25 +000038 the exception of the readinto() and truncate() methods.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000039
40 """
Guido van Rossum15262191997-04-30 16:04:57 +000041
Guido van Rossum68de3791997-07-19 20:22:23 +000042 myfileobj = None
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +000043 max_read_chunk = 10 * 1024 * 1024 # 10Mb
Guido van Rossum68de3791997-07-19 20:22:23 +000044
Tim Peters07e99cb2001-01-14 23:47:14 +000045 def __init__(self, filename=None, mode=None,
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +000046 compresslevel=9, fileobj=None, mtime=None):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000047 """Constructor for the GzipFile class.
48
49 At least one of fileobj and filename must be given a
50 non-trivial value.
51
52 The new class instance is based on fileobj, which can be a regular
53 file, a StringIO object, or any other object which simulates a file.
54 It defaults to None, in which case filename is opened to provide
55 a file object.
56
57 When fileobj is not None, the filename argument is only used to be
58 included in the gzip file header, which may includes the original
59 filename of the uncompressed file. It defaults to the filename of
60 fileobj, if discernible; otherwise, it defaults to the empty string,
61 and in this case the original filename is not included in the header.
62
63 The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
64 depending on whether the file will be read or written. The default
65 is the mode of fileobj if discernible; otherwise, the default is 'rb'.
66 Be aware that only the 'rb', 'ab', and 'wb' values should be used
67 for cross-platform portability.
68
69 The compresslevel argument is an integer from 1 to 9 controlling the
70 level of compression; 1 is fastest and produces the least compression,
71 and 9 is slowest and produces the most compression. The default is 9.
72
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +000073 The mtime argument is an optional numeric timestamp to be written
74 to the stream when compressing. All gzip compressed streams
75 are required to contain a timestamp. If omitted or None, the
76 current time is used. This module ignores the timestamp when
77 decompressing; however, some programs, such as gunzip, make use
78 of it. The format of the timestamp is the same as that of the
79 return value of time.time() and of the st_mtime member of the
80 object returned by os.stat().
81
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000082 """
83
Skip Montanaro12424bc2002-05-23 01:43:05 +000084 # guarantee the file is opened in binary mode on platforms
85 # that care about that sort of thing
86 if mode and 'b' not in mode:
87 mode += 'b'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000088 if fileobj is None:
Fred Drake9bb76d11999-04-05 18:33:40 +000089 fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
Guido van Rossum68de3791997-07-19 20:22:23 +000090 if filename is None:
Nadeem Vawdad7664de2012-01-19 00:40:46 +020091 # Issue #13781: os.fdopen() creates a fileobj with a bogus name
92 # attribute. Avoid saving this in the gzip header's filename field.
93 if hasattr(fileobj, 'name') and fileobj.name != '<fdopen>':
94 filename = fileobj.name
95 else:
96 filename = ''
Guido van Rossum68de3791997-07-19 20:22:23 +000097 if mode is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000098 if hasattr(fileobj, 'mode'): mode = fileobj.mode
Fred Drake9bb76d11999-04-05 18:33:40 +000099 else: mode = 'rb'
Guido van Rossum68de3791997-07-19 20:22:23 +0000100
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000101 if mode[0:1] == 'r':
102 self.mode = READ
Tim Peters07e99cb2001-01-14 23:47:14 +0000103 # Set flag indicating start of a new member
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000104 self._new_member = True
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000105 # Buffer data read from gzip file. extrastart is offset in
106 # stream where buffer starts. extrasize is number of
107 # bytes remaining in buffer from current stream position.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000108 self.extrabuf = ""
109 self.extrasize = 0
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000110 self.extrastart = 0
Lars Gustäbel5b1a7852007-02-13 16:09:24 +0000111 self.name = filename
Bob Ippolitod82c3102006-05-22 15:59:12 +0000112 # Starts small, scales exponentially
113 self.min_readsize = 100
Guido van Rossum15262191997-04-30 16:04:57 +0000114
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000115 elif mode[0:1] == 'w' or mode[0:1] == 'a':
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000116 self.mode = WRITE
117 self._init_write(filename)
118 self.compress = zlib.compressobj(compresslevel,
Tim Peters07e99cb2001-01-14 23:47:14 +0000119 zlib.DEFLATED,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000120 -zlib.MAX_WBITS,
121 zlib.DEF_MEM_LEVEL,
122 0)
123 else:
Martin v. Löwisdb044892002-03-11 06:46:52 +0000124 raise IOError, "Mode " + mode + " not supported"
Guido van Rossum15262191997-04-30 16:04:57 +0000125
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000126 self.fileobj = fileobj
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000127 self.offset = 0
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000128 self.mtime = mtime
Guido van Rossum15262191997-04-30 16:04:57 +0000129
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000130 if self.mode == WRITE:
131 self._write_gzip_header()
Guido van Rossum15262191997-04-30 16:04:57 +0000132
Lars Gustäbel5b1a7852007-02-13 16:09:24 +0000133 @property
134 def filename(self):
135 import warnings
Philip Jenveyd846f1d2009-05-08 02:28:39 +0000136 warnings.warn("use the name attribute", DeprecationWarning, 2)
Lars Gustäbel5b1a7852007-02-13 16:09:24 +0000137 if self.mode == WRITE and self.name[-3:] != ".gz":
138 return self.name + ".gz"
139 return self.name
140
Guido van Rossum15262191997-04-30 16:04:57 +0000141 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000142 s = repr(self.fileobj)
143 return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
Guido van Rossum15262191997-04-30 16:04:57 +0000144
Antoine Pitrou76a66aa2010-10-06 21:26:52 +0000145 def _check_closed(self):
146 """Raises a ValueError if the underlying file object has been closed.
147
148 """
149 if self.closed:
150 raise ValueError('I/O operation on closed file.')
151
Guido van Rossum15262191997-04-30 16:04:57 +0000152 def _init_write(self, filename):
Lars Gustäbel5b1a7852007-02-13 16:09:24 +0000153 self.name = filename
Gregory P. Smith79b4ba82008-03-23 21:04:43 +0000154 self.crc = zlib.crc32("") & 0xffffffffL
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000155 self.size = 0
156 self.writebuf = []
157 self.bufsize = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000158
159 def _write_gzip_header(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000160 self.fileobj.write('\037\213') # magic header
161 self.fileobj.write('\010') # compression method
Lars Gustäbel8c06ccc2009-10-29 09:15:00 +0000162 fname = os.path.basename(self.name)
Lars Gustäbelf19c1b52007-02-13 16:24:00 +0000163 if fname.endswith(".gz"):
164 fname = fname[:-3]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000165 flags = 0
Lars Gustäbelf19c1b52007-02-13 16:24:00 +0000166 if fname:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000167 flags = FNAME
168 self.fileobj.write(chr(flags))
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000169 mtime = self.mtime
170 if mtime is None:
171 mtime = time.time()
172 write32u(self.fileobj, long(mtime))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000173 self.fileobj.write('\002')
174 self.fileobj.write('\377')
Lars Gustäbelf19c1b52007-02-13 16:24:00 +0000175 if fname:
176 self.fileobj.write(fname + '\000')
Guido van Rossum15262191997-04-30 16:04:57 +0000177
178 def _init_read(self):
Gregory P. Smith79b4ba82008-03-23 21:04:43 +0000179 self.crc = zlib.crc32("") & 0xffffffffL
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000180 self.size = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000181
182 def _read_gzip_header(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000183 magic = self.fileobj.read(2)
184 if magic != '\037\213':
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000185 raise IOError, 'Not a gzipped file'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000186 method = ord( self.fileobj.read(1) )
187 if method != 8:
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000188 raise IOError, 'Unknown compression method'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000189 flag = ord( self.fileobj.read(1) )
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000190 self.mtime = read32(self.fileobj)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000191 # extraflag = self.fileobj.read(1)
192 # os = self.fileobj.read(1)
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000193 self.fileobj.read(2)
Guido van Rossum15262191997-04-30 16:04:57 +0000194
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000195 if flag & FEXTRA:
196 # Read & discard the extra field, if present
Tim Petersfb0ea522002-11-04 19:50:11 +0000197 xlen = ord(self.fileobj.read(1))
198 xlen = xlen + 256*ord(self.fileobj.read(1))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000199 self.fileobj.read(xlen)
200 if flag & FNAME:
201 # Read and discard a null-terminated string containing the filename
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000202 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000203 s = self.fileobj.read(1)
204 if not s or s=='\000':
205 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000206 if flag & FCOMMENT:
207 # Read and discard a null-terminated string containing a comment
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000208 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000209 s = self.fileobj.read(1)
210 if not s or s=='\000':
211 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000212 if flag & FHCRC:
213 self.fileobj.read(2) # Read & discard the 16-bit header CRC
Guido van Rossum15262191997-04-30 16:04:57 +0000214
Guido van Rossum15262191997-04-30 16:04:57 +0000215 def write(self,data):
Antoine Pitrou76a66aa2010-10-06 21:26:52 +0000216 self._check_closed()
Martin v. Löwisdb044892002-03-11 06:46:52 +0000217 if self.mode != WRITE:
218 import errno
219 raise IOError(errno.EBADF, "write() on read-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000220
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000221 if self.fileobj is None:
222 raise ValueError, "write() on closed GzipFile object"
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000223
224 # Convert data type if called by io.BufferedWriter.
225 if isinstance(data, memoryview):
226 data = data.tobytes()
227
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000228 if len(data) > 0:
229 self.size = self.size + len(data)
Gregory P. Smith79b4ba82008-03-23 21:04:43 +0000230 self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000231 self.fileobj.write( self.compress.compress(data) )
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000232 self.offset += len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000233
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000234 return len(data)
235
Guido van Rossum56068012000-02-02 16:51:06 +0000236 def read(self, size=-1):
Antoine Pitrou76a66aa2010-10-06 21:26:52 +0000237 self._check_closed()
Martin v. Löwisdb044892002-03-11 06:46:52 +0000238 if self.mode != READ:
239 import errno
Brett Cannonedfb3022003-12-04 19:28:06 +0000240 raise IOError(errno.EBADF, "read() on write-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000241
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000242 if self.extrasize <= 0 and self.fileobj is None:
243 return ''
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000244
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000245 readsize = 1024
Guido van Rossum56068012000-02-02 16:51:06 +0000246 if size < 0: # get the whole thing
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000247 try:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000248 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000249 self._read(readsize)
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000250 readsize = min(self.max_read_chunk, readsize * 2)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000251 except EOFError:
252 size = self.extrasize
253 else: # just get some more of it
254 try:
255 while size > self.extrasize:
256 self._read(readsize)
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000257 readsize = min(self.max_read_chunk, readsize * 2)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000258 except EOFError:
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000259 if size > self.extrasize:
260 size = self.extrasize
Tim Peters07e99cb2001-01-14 23:47:14 +0000261
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000262 offset = self.offset - self.extrastart
263 chunk = self.extrabuf[offset: offset + size]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000264 self.extrasize = self.extrasize - size
Guido van Rossum15262191997-04-30 16:04:57 +0000265
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000266 self.offset += size
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000267 return chunk
Guido van Rossum15262191997-04-30 16:04:57 +0000268
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000269 def _unread(self, buf):
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000270 self.extrasize = len(buf) + self.extrasize
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000271 self.offset -= len(buf)
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000272
273 def _read(self, size=1024):
Tim Petersfb0ea522002-11-04 19:50:11 +0000274 if self.fileobj is None:
275 raise EOFError, "Reached EOF"
Tim Peters07e99cb2001-01-14 23:47:14 +0000276
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000277 if self._new_member:
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000278 # If the _new_member flag is set, we have to
279 # jump to the next member, if there is one.
Tim Peters07e99cb2001-01-14 23:47:14 +0000280 #
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000281 # First, check if we're at the end of the file;
282 # if so, it's time to stop; no more members to read.
283 pos = self.fileobj.tell() # Save current position
284 self.fileobj.seek(0, 2) # Seek to end of file
285 if pos == self.fileobj.tell():
Andrew M. Kuchling2d813e51999-09-06 16:34:51 +0000286 raise EOFError, "Reached EOF"
Tim Peters07e99cb2001-01-14 23:47:14 +0000287 else:
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000288 self.fileobj.seek( pos ) # Return to original position
Tim Peters07e99cb2001-01-14 23:47:14 +0000289
290 self._init_read()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000291 self._read_gzip_header()
292 self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000293 self._new_member = False
Tim Peters07e99cb2001-01-14 23:47:14 +0000294
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000295 # Read a chunk of data from the file
296 buf = self.fileobj.read(size)
Tim Peters07e99cb2001-01-14 23:47:14 +0000297
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000298 # If the EOF has been reached, flush the decompression object
299 # and mark this object as finished.
Tim Peters07e99cb2001-01-14 23:47:14 +0000300
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000301 if buf == "":
302 uncompress = self.decompress.flush()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000303 self._read_eof()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000304 self._add_read_data( uncompress )
305 raise EOFError, 'Reached EOF'
Tim Peters07e99cb2001-01-14 23:47:14 +0000306
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000307 uncompress = self.decompress.decompress(buf)
308 self._add_read_data( uncompress )
309
310 if self.decompress.unused_data != "":
311 # Ending case: we've come to the end of a member in the file,
312 # so seek back to the start of the unused data, finish up
313 # this member, and read a new gzip header.
314 # (The number of bytes to seek back is the length of the unused
315 # data, minus 8 because _read_eof() will rewind a further 8 bytes)
316 self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
317
318 # Check the CRC and file size, and set the flag so we read
Tim Peters07e99cb2001-01-14 23:47:14 +0000319 # a new member on the next call
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000320 self._read_eof()
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000321 self._new_member = True
Tim Peters07e99cb2001-01-14 23:47:14 +0000322
323 def _add_read_data(self, data):
Gregory P. Smith79b4ba82008-03-23 21:04:43 +0000324 self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000325 offset = self.offset - self.extrastart
326 self.extrabuf = self.extrabuf[offset:] + data
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000327 self.extrasize = self.extrasize + len(data)
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000328 self.extrastart = self.offset
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000329 self.size = self.size + len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000330
331 def _read_eof(self):
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000332 # We've read to the end of the file, so we have to rewind in order
Tim Peters07e99cb2001-01-14 23:47:14 +0000333 # to reread the 8 bytes containing the CRC and the file size.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000334 # We check the that the computed CRC and size of the
Tim Peters9288f952002-11-05 20:38:55 +0000335 # uncompressed data matches the stored values. Note that the size
336 # stored is the true file size mod 2**32.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000337 self.fileobj.seek(-8, 1)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000338 crc32 = read32(self.fileobj)
Gregory P. Smith79b4ba82008-03-23 21:04:43 +0000339 isize = read32(self.fileobj) # may exceed 2GB
340 if crc32 != self.crc:
341 raise IOError("CRC check failed %s != %s" % (hex(crc32),
342 hex(self.crc)))
Gregory P. Smithac830e92008-03-23 23:43:02 +0000343 elif isize != (self.size & 0xffffffffL):
Andrew M. Kuchling64edd6a2003-02-05 21:35:07 +0000344 raise IOError, "Incorrect length of data produced"
Tim Peters07e99cb2001-01-14 23:47:14 +0000345
Antoine Pitrou5a9112c2010-01-13 14:32:10 +0000346 # Gzip files can be padded with zeroes and still have archives.
347 # Consume all zero bytes and set the file position to the first
348 # non-zero byte. See http://www.gzip.org/#faq8
349 c = "\x00"
350 while c == "\x00":
351 c = self.fileobj.read(1)
352 if c:
353 self.fileobj.seek(-1, 1)
354
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000355 @property
356 def closed(self):
357 return self.fileobj is None
358
Guido van Rossum15262191997-04-30 16:04:57 +0000359 def close(self):
Georg Brandle08e3d02008-05-25 08:07:37 +0000360 if self.fileobj is None:
361 return
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000362 if self.mode == WRITE:
363 self.fileobj.write(self.compress.flush())
Gregory P. Smith79b4ba82008-03-23 21:04:43 +0000364 write32u(self.fileobj, self.crc)
Tim Peters9288f952002-11-05 20:38:55 +0000365 # self.size may exceed 2GB, or even 4GB
Gregory P. Smithdd102842008-03-23 23:45:12 +0000366 write32u(self.fileobj, self.size & 0xffffffffL)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000367 self.fileobj = None
368 elif self.mode == READ:
369 self.fileobj = None
370 if self.myfileobj:
371 self.myfileobj.close()
372 self.myfileobj = None
Guido van Rossum15262191997-04-30 16:04:57 +0000373
Martin v. Löwisf2a8d632005-03-03 08:35:22 +0000374 def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
Antoine Pitrou76a66aa2010-10-06 21:26:52 +0000375 self._check_closed()
Martin v. Löwisf2a8d632005-03-03 08:35:22 +0000376 if self.mode == WRITE:
Tim Peterseba28be2005-03-28 01:08:02 +0000377 # Ensure the compressor's buffer is flushed
378 self.fileobj.write(self.compress.flush(zlib_mode))
Mark Dickinson3b0b4ff2010-05-04 18:45:27 +0000379 self.fileobj.flush()
Guido van Rossum15262191997-04-30 16:04:57 +0000380
Tim Peters5cfb05e2004-07-27 21:02:02 +0000381 def fileno(self):
382 """Invoke the underlying file object's fileno() method.
383
384 This will raise AttributeError if the underlying file object
385 doesn't support fileno().
386 """
387 return self.fileobj.fileno()
388
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000389 def rewind(self):
390 '''Return the uncompressed stream file position indicator to the
Tim Petersab9ba272001-08-09 21:40:30 +0000391 beginning of the file'''
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000392 if self.mode != READ:
393 raise IOError("Can't rewind in write mode")
394 self.fileobj.seek(0)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000395 self._new_member = True
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000396 self.extrabuf = ""
397 self.extrasize = 0
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000398 self.extrastart = 0
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000399 self.offset = 0
400
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000401 def readable(self):
402 return self.mode == READ
403
404 def writable(self):
405 return self.mode == WRITE
406
407 def seekable(self):
408 return True
409
Martin v. Löwis065f0c82006-11-12 10:41:39 +0000410 def seek(self, offset, whence=0):
411 if whence:
412 if whence == 1:
413 offset = self.offset + offset
414 else:
415 raise ValueError('Seek from end not supported')
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000416 if self.mode == WRITE:
417 if offset < self.offset:
418 raise IOError('Negative seek in write mode')
419 count = offset - self.offset
Tim Petersfb0ea522002-11-04 19:50:11 +0000420 for i in range(count // 1024):
421 self.write(1024 * '\0')
422 self.write((count % 1024) * '\0')
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000423 elif self.mode == READ:
424 if offset < self.offset:
425 # for negative seek, rewind and do positive seek
426 self.rewind()
427 count = offset - self.offset
Tim Petersfb0ea522002-11-04 19:50:11 +0000428 for i in range(count // 1024):
429 self.read(1024)
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000430 self.read(count % 1024)
431
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000432 return self.offset
433
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000434 def readline(self, size=-1):
Bob Ippolitod82c3102006-05-22 15:59:12 +0000435 if size < 0:
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000436 # Shortcut common case - newline found in buffer.
437 offset = self.offset - self.extrastart
438 i = self.extrabuf.find('\n', offset) + 1
439 if i > 0:
440 self.extrasize -= i - offset
441 self.offset += i - offset
442 return self.extrabuf[offset: i]
443
Bob Ippolitod82c3102006-05-22 15:59:12 +0000444 size = sys.maxint
445 readsize = self.min_readsize
446 else:
447 readsize = size
Bob Ippolitob9759732006-05-22 15:22:46 +0000448 bufs = []
Bob Ippolitod82c3102006-05-22 15:59:12 +0000449 while size != 0:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000450 c = self.read(readsize)
Eric S. Raymondee5e61d2001-02-09 09:10:35 +0000451 i = c.find('\n')
Bob Ippolitod82c3102006-05-22 15:59:12 +0000452
453 # We set i=size to break out of the loop under two
454 # conditions: 1) there's no newline, and the chunk is
455 # larger than size, or 2) there is a newline, but the
456 # resulting line would be longer than 'size'.
457 if (size <= i) or (i == -1 and len(c) > size):
458 i = size - 1
Guido van Rossum15262191997-04-30 16:04:57 +0000459
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000460 if i >= 0 or c == '':
Bob Ippolitod82c3102006-05-22 15:59:12 +0000461 bufs.append(c[:i + 1]) # Add portion of last chunk
462 self._unread(c[i + 1:]) # Push back rest of chunk
463 break
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000464
Bob Ippolitob9759732006-05-22 15:22:46 +0000465 # Append chunk to list, decrease 'size',
466 bufs.append(c)
467 size = size - len(c)
468 readsize = min(size, readsize * 2)
Bob Ippolitod82c3102006-05-22 15:59:12 +0000469 if readsize > self.min_readsize:
470 self.min_readsize = min(readsize, self.min_readsize * 2, 512)
471 return ''.join(bufs) # Return resulting line
Tim Peters07e99cb2001-01-14 23:47:14 +0000472
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000473
474def _test():
475 # Act like gzip; with -d, act like gunzip.
476 # The input file is not deleted, however, nor are any other gzip
477 # options or features supported.
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000478 args = sys.argv[1:]
479 decompress = args and args[0] == "-d"
480 if decompress:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000481 args = args[1:]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000482 if not args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000483 args = ["-"]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000484 for arg in args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000485 if decompress:
486 if arg == "-":
487 f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)
488 g = sys.stdout
489 else:
490 if arg[-3:] != ".gz":
Walter Dörwald70a6b492004-02-12 17:35:32 +0000491 print "filename doesn't end in .gz:", repr(arg)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000492 continue
493 f = open(arg, "rb")
494 g = __builtin__.open(arg[:-3], "wb")
495 else:
496 if arg == "-":
497 f = sys.stdin
498 g = GzipFile(filename="", mode="wb", fileobj=sys.stdout)
499 else:
500 f = __builtin__.open(arg, "rb")
501 g = open(arg + ".gz", "wb")
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000502 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000503 chunk = f.read(1024)
504 if not chunk:
505 break
506 g.write(chunk)
507 if g is not sys.stdout:
508 g.close()
509 if f is not sys.stdin:
510 f.close()
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000511
512if __name__ == '__main__':
513 _test()