blob: 26f435456b6e0897da9d8c5827ffe14d27f061ba [file] [log] [blame]
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00001"""Functions that read and write gzipped files.
2
Guido van Rossum54f22ed2000-02-04 15:10:34 +00003The user of the file doesn't have to worry about the compression,
4but random access is not allowed."""
5
6# based on Andrew Kuchling's minigzip.py distributed with the zlib module
7
Lars Gustäbel8c06ccc2009-10-29 09:15:00 +00008import struct, sys, time, os
Guido van Rossum15262191997-04-30 16:04:57 +00009import zlib
Antoine Pitrou673ddf92010-01-03 22:29:56 +000010import io
Guido van Rossum68de3791997-07-19 20:22:23 +000011import __builtin__
Guido van Rossum15262191997-04-30 16:04:57 +000012
Skip Montanaro2dd42762001-01-23 15:35:05 +000013__all__ = ["GzipFile","open"]
14
Guido van Rossum15262191997-04-30 16:04:57 +000015FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
16
17READ, WRITE = 1, 2
18
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000019def write32u(output, value):
Tim Petersfb0ea522002-11-04 19:50:11 +000020 # The L format writes the bit pattern correctly whether signed
21 # or unsigned.
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000022 output.write(struct.pack("<L", value))
23
Guido van Rossum15262191997-04-30 16:04:57 +000024def read32(input):
Gregory P. Smith79b4ba82008-03-23 21:04:43 +000025 return struct.unpack("<I", input.read(4))[0]
Guido van Rossum15262191997-04-30 16:04:57 +000026
Fred Drakefa1591c1999-04-05 18:37:59 +000027def open(filename, mode="rb", compresslevel=9):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000028 """Shorthand for GzipFile(filename, mode, compresslevel).
29
30 The filename argument is required; mode defaults to 'rb'
31 and compresslevel defaults to 9.
32
33 """
Guido van Rossum15262191997-04-30 16:04:57 +000034 return GzipFile(filename, mode, compresslevel)
35
Antoine Pitrou673ddf92010-01-03 22:29:56 +000036class GzipFile(io.BufferedIOBase):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000037 """The GzipFile class simulates most of the methods of a file object with
Guido van Rossum97c5fcc2002-08-06 17:03:25 +000038 the exception of the readinto() and truncate() methods.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000039
40 """
Guido van Rossum15262191997-04-30 16:04:57 +000041
Guido van Rossum68de3791997-07-19 20:22:23 +000042 myfileobj = None
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +000043 max_read_chunk = 10 * 1024 * 1024 # 10Mb
Guido van Rossum68de3791997-07-19 20:22:23 +000044
Tim Peters07e99cb2001-01-14 23:47:14 +000045 def __init__(self, filename=None, mode=None,
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +000046 compresslevel=9, fileobj=None, mtime=None):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000047 """Constructor for the GzipFile class.
48
49 At least one of fileobj and filename must be given a
50 non-trivial value.
51
52 The new class instance is based on fileobj, which can be a regular
53 file, a StringIO object, or any other object which simulates a file.
54 It defaults to None, in which case filename is opened to provide
55 a file object.
56
57 When fileobj is not None, the filename argument is only used to be
58 included in the gzip file header, which may includes the original
59 filename of the uncompressed file. It defaults to the filename of
60 fileobj, if discernible; otherwise, it defaults to the empty string,
61 and in this case the original filename is not included in the header.
62
63 The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
64 depending on whether the file will be read or written. The default
65 is the mode of fileobj if discernible; otherwise, the default is 'rb'.
66 Be aware that only the 'rb', 'ab', and 'wb' values should be used
67 for cross-platform portability.
68
69 The compresslevel argument is an integer from 1 to 9 controlling the
70 level of compression; 1 is fastest and produces the least compression,
71 and 9 is slowest and produces the most compression. The default is 9.
72
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +000073 The mtime argument is an optional numeric timestamp to be written
74 to the stream when compressing. All gzip compressed streams
75 are required to contain a timestamp. If omitted or None, the
76 current time is used. This module ignores the timestamp when
77 decompressing; however, some programs, such as gunzip, make use
78 of it. The format of the timestamp is the same as that of the
79 return value of time.time() and of the st_mtime member of the
80 object returned by os.stat().
81
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000082 """
83
Skip Montanaro12424bc2002-05-23 01:43:05 +000084 # guarantee the file is opened in binary mode on platforms
85 # that care about that sort of thing
86 if mode and 'b' not in mode:
87 mode += 'b'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000088 if fileobj is None:
Fred Drake9bb76d11999-04-05 18:33:40 +000089 fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
Guido van Rossum68de3791997-07-19 20:22:23 +000090 if filename is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000091 if hasattr(fileobj, 'name'): filename = fileobj.name
92 else: filename = ''
Guido van Rossum68de3791997-07-19 20:22:23 +000093 if mode is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000094 if hasattr(fileobj, 'mode'): mode = fileobj.mode
Fred Drake9bb76d11999-04-05 18:33:40 +000095 else: mode = 'rb'
Guido van Rossum68de3791997-07-19 20:22:23 +000096
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000097 if mode[0:1] == 'r':
98 self.mode = READ
Tim Peters07e99cb2001-01-14 23:47:14 +000099 # Set flag indicating start of a new member
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000100 self._new_member = True
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000101 # Buffer data read from gzip file. extrastart is offset in
102 # stream where buffer starts. extrasize is number of
103 # bytes remaining in buffer from current stream position.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000104 self.extrabuf = ""
105 self.extrasize = 0
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000106 self.extrastart = 0
Lars Gustäbel5b1a7852007-02-13 16:09:24 +0000107 self.name = filename
Bob Ippolitod82c3102006-05-22 15:59:12 +0000108 # Starts small, scales exponentially
109 self.min_readsize = 100
Guido van Rossum15262191997-04-30 16:04:57 +0000110
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000111 elif mode[0:1] == 'w' or mode[0:1] == 'a':
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000112 self.mode = WRITE
113 self._init_write(filename)
114 self.compress = zlib.compressobj(compresslevel,
Tim Peters07e99cb2001-01-14 23:47:14 +0000115 zlib.DEFLATED,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000116 -zlib.MAX_WBITS,
117 zlib.DEF_MEM_LEVEL,
118 0)
119 else:
Martin v. Löwisdb044892002-03-11 06:46:52 +0000120 raise IOError, "Mode " + mode + " not supported"
Guido van Rossum15262191997-04-30 16:04:57 +0000121
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000122 self.fileobj = fileobj
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000123 self.offset = 0
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000124 self.mtime = mtime
Guido van Rossum15262191997-04-30 16:04:57 +0000125
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000126 if self.mode == WRITE:
127 self._write_gzip_header()
Guido van Rossum15262191997-04-30 16:04:57 +0000128
Lars Gustäbel5b1a7852007-02-13 16:09:24 +0000129 @property
130 def filename(self):
131 import warnings
Philip Jenveyd846f1d2009-05-08 02:28:39 +0000132 warnings.warn("use the name attribute", DeprecationWarning, 2)
Lars Gustäbel5b1a7852007-02-13 16:09:24 +0000133 if self.mode == WRITE and self.name[-3:] != ".gz":
134 return self.name + ".gz"
135 return self.name
136
Guido van Rossum15262191997-04-30 16:04:57 +0000137 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000138 s = repr(self.fileobj)
139 return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
Guido van Rossum15262191997-04-30 16:04:57 +0000140
141 def _init_write(self, filename):
Lars Gustäbel5b1a7852007-02-13 16:09:24 +0000142 self.name = filename
Gregory P. Smith79b4ba82008-03-23 21:04:43 +0000143 self.crc = zlib.crc32("") & 0xffffffffL
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000144 self.size = 0
145 self.writebuf = []
146 self.bufsize = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000147
148 def _write_gzip_header(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000149 self.fileobj.write('\037\213') # magic header
150 self.fileobj.write('\010') # compression method
Lars Gustäbel8c06ccc2009-10-29 09:15:00 +0000151 fname = os.path.basename(self.name)
Lars Gustäbelf19c1b52007-02-13 16:24:00 +0000152 if fname.endswith(".gz"):
153 fname = fname[:-3]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000154 flags = 0
Lars Gustäbelf19c1b52007-02-13 16:24:00 +0000155 if fname:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000156 flags = FNAME
157 self.fileobj.write(chr(flags))
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000158 mtime = self.mtime
159 if mtime is None:
160 mtime = time.time()
161 write32u(self.fileobj, long(mtime))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000162 self.fileobj.write('\002')
163 self.fileobj.write('\377')
Lars Gustäbelf19c1b52007-02-13 16:24:00 +0000164 if fname:
165 self.fileobj.write(fname + '\000')
Guido van Rossum15262191997-04-30 16:04:57 +0000166
167 def _init_read(self):
Gregory P. Smith79b4ba82008-03-23 21:04:43 +0000168 self.crc = zlib.crc32("") & 0xffffffffL
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000169 self.size = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000170
171 def _read_gzip_header(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000172 magic = self.fileobj.read(2)
173 if magic != '\037\213':
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000174 raise IOError, 'Not a gzipped file'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000175 method = ord( self.fileobj.read(1) )
176 if method != 8:
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000177 raise IOError, 'Unknown compression method'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000178 flag = ord( self.fileobj.read(1) )
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000179 self.mtime = read32(self.fileobj)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000180 # extraflag = self.fileobj.read(1)
181 # os = self.fileobj.read(1)
Antoine Pitrouf0d2c3f2009-01-04 21:29:23 +0000182 self.fileobj.read(2)
Guido van Rossum15262191997-04-30 16:04:57 +0000183
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000184 if flag & FEXTRA:
185 # Read & discard the extra field, if present
Tim Petersfb0ea522002-11-04 19:50:11 +0000186 xlen = ord(self.fileobj.read(1))
187 xlen = xlen + 256*ord(self.fileobj.read(1))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000188 self.fileobj.read(xlen)
189 if flag & FNAME:
190 # Read and discard a null-terminated string containing the filename
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000191 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000192 s = self.fileobj.read(1)
193 if not s or s=='\000':
194 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000195 if flag & FCOMMENT:
196 # Read and discard a null-terminated string containing a comment
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000197 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000198 s = self.fileobj.read(1)
199 if not s or s=='\000':
200 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000201 if flag & FHCRC:
202 self.fileobj.read(2) # Read & discard the 16-bit header CRC
Guido van Rossum15262191997-04-30 16:04:57 +0000203
Guido van Rossum15262191997-04-30 16:04:57 +0000204 def write(self,data):
Martin v. Löwisdb044892002-03-11 06:46:52 +0000205 if self.mode != WRITE:
206 import errno
207 raise IOError(errno.EBADF, "write() on read-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000208
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000209 if self.fileobj is None:
210 raise ValueError, "write() on closed GzipFile object"
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000211
212 # Convert data type if called by io.BufferedWriter.
213 if isinstance(data, memoryview):
214 data = data.tobytes()
215
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000216 if len(data) > 0:
217 self.size = self.size + len(data)
Gregory P. Smith79b4ba82008-03-23 21:04:43 +0000218 self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000219 self.fileobj.write( self.compress.compress(data) )
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000220 self.offset += len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000221
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000222 return len(data)
223
Guido van Rossum56068012000-02-02 16:51:06 +0000224 def read(self, size=-1):
Martin v. Löwisdb044892002-03-11 06:46:52 +0000225 if self.mode != READ:
226 import errno
Brett Cannonedfb3022003-12-04 19:28:06 +0000227 raise IOError(errno.EBADF, "read() on write-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000228
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000229 if self.extrasize <= 0 and self.fileobj is None:
230 return ''
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000231
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000232 readsize = 1024
Guido van Rossum56068012000-02-02 16:51:06 +0000233 if size < 0: # get the whole thing
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000234 try:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000235 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000236 self._read(readsize)
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000237 readsize = min(self.max_read_chunk, readsize * 2)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000238 except EOFError:
239 size = self.extrasize
240 else: # just get some more of it
241 try:
242 while size > self.extrasize:
243 self._read(readsize)
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000244 readsize = min(self.max_read_chunk, readsize * 2)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000245 except EOFError:
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000246 if size > self.extrasize:
247 size = self.extrasize
Tim Peters07e99cb2001-01-14 23:47:14 +0000248
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000249 offset = self.offset - self.extrastart
250 chunk = self.extrabuf[offset: offset + size]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000251 self.extrasize = self.extrasize - size
Guido van Rossum15262191997-04-30 16:04:57 +0000252
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000253 self.offset += size
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000254 return chunk
Guido van Rossum15262191997-04-30 16:04:57 +0000255
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000256 def _unread(self, buf):
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000257 self.extrasize = len(buf) + self.extrasize
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000258 self.offset -= len(buf)
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000259
260 def _read(self, size=1024):
Tim Petersfb0ea522002-11-04 19:50:11 +0000261 if self.fileobj is None:
262 raise EOFError, "Reached EOF"
Tim Peters07e99cb2001-01-14 23:47:14 +0000263
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000264 if self._new_member:
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000265 # If the _new_member flag is set, we have to
266 # jump to the next member, if there is one.
Tim Peters07e99cb2001-01-14 23:47:14 +0000267 #
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000268 # First, check if we're at the end of the file;
269 # if so, it's time to stop; no more members to read.
270 pos = self.fileobj.tell() # Save current position
271 self.fileobj.seek(0, 2) # Seek to end of file
272 if pos == self.fileobj.tell():
Andrew M. Kuchling2d813e51999-09-06 16:34:51 +0000273 raise EOFError, "Reached EOF"
Tim Peters07e99cb2001-01-14 23:47:14 +0000274 else:
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000275 self.fileobj.seek( pos ) # Return to original position
Tim Peters07e99cb2001-01-14 23:47:14 +0000276
277 self._init_read()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000278 self._read_gzip_header()
279 self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000280 self._new_member = False
Tim Peters07e99cb2001-01-14 23:47:14 +0000281
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000282 # Read a chunk of data from the file
283 buf = self.fileobj.read(size)
Tim Peters07e99cb2001-01-14 23:47:14 +0000284
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000285 # If the EOF has been reached, flush the decompression object
286 # and mark this object as finished.
Tim Peters07e99cb2001-01-14 23:47:14 +0000287
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000288 if buf == "":
289 uncompress = self.decompress.flush()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000290 self._read_eof()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000291 self._add_read_data( uncompress )
292 raise EOFError, 'Reached EOF'
Tim Peters07e99cb2001-01-14 23:47:14 +0000293
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000294 uncompress = self.decompress.decompress(buf)
295 self._add_read_data( uncompress )
296
297 if self.decompress.unused_data != "":
298 # Ending case: we've come to the end of a member in the file,
299 # so seek back to the start of the unused data, finish up
300 # this member, and read a new gzip header.
301 # (The number of bytes to seek back is the length of the unused
302 # data, minus 8 because _read_eof() will rewind a further 8 bytes)
303 self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
304
305 # Check the CRC and file size, and set the flag so we read
Tim Peters07e99cb2001-01-14 23:47:14 +0000306 # a new member on the next call
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000307 self._read_eof()
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000308 self._new_member = True
Tim Peters07e99cb2001-01-14 23:47:14 +0000309
310 def _add_read_data(self, data):
Gregory P. Smith79b4ba82008-03-23 21:04:43 +0000311 self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000312 offset = self.offset - self.extrastart
313 self.extrabuf = self.extrabuf[offset:] + data
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000314 self.extrasize = self.extrasize + len(data)
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000315 self.extrastart = self.offset
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000316 self.size = self.size + len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000317
318 def _read_eof(self):
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000319 # We've read to the end of the file, so we have to rewind in order
Tim Peters07e99cb2001-01-14 23:47:14 +0000320 # to reread the 8 bytes containing the CRC and the file size.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000321 # We check the that the computed CRC and size of the
Tim Peters9288f952002-11-05 20:38:55 +0000322 # uncompressed data matches the stored values. Note that the size
323 # stored is the true file size mod 2**32.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000324 self.fileobj.seek(-8, 1)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000325 crc32 = read32(self.fileobj)
Gregory P. Smith79b4ba82008-03-23 21:04:43 +0000326 isize = read32(self.fileobj) # may exceed 2GB
327 if crc32 != self.crc:
328 raise IOError("CRC check failed %s != %s" % (hex(crc32),
329 hex(self.crc)))
Gregory P. Smithac830e92008-03-23 23:43:02 +0000330 elif isize != (self.size & 0xffffffffL):
Andrew M. Kuchling64edd6a2003-02-05 21:35:07 +0000331 raise IOError, "Incorrect length of data produced"
Tim Peters07e99cb2001-01-14 23:47:14 +0000332
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000333 @property
334 def closed(self):
335 return self.fileobj is None
336
Guido van Rossum15262191997-04-30 16:04:57 +0000337 def close(self):
Georg Brandle08e3d02008-05-25 08:07:37 +0000338 if self.fileobj is None:
339 return
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000340 if self.mode == WRITE:
341 self.fileobj.write(self.compress.flush())
Gregory P. Smith79b4ba82008-03-23 21:04:43 +0000342 write32u(self.fileobj, self.crc)
Tim Peters9288f952002-11-05 20:38:55 +0000343 # self.size may exceed 2GB, or even 4GB
Gregory P. Smithdd102842008-03-23 23:45:12 +0000344 write32u(self.fileobj, self.size & 0xffffffffL)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000345 self.fileobj = None
346 elif self.mode == READ:
347 self.fileobj = None
348 if self.myfileobj:
349 self.myfileobj.close()
350 self.myfileobj = None
Guido van Rossum15262191997-04-30 16:04:57 +0000351
Martin v. Löwisf2a8d632005-03-03 08:35:22 +0000352 def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
353 if self.mode == WRITE:
Tim Peterseba28be2005-03-28 01:08:02 +0000354 # Ensure the compressor's buffer is flushed
355 self.fileobj.write(self.compress.flush(zlib_mode))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000356 self.fileobj.flush()
Guido van Rossum15262191997-04-30 16:04:57 +0000357
Tim Peters5cfb05e2004-07-27 21:02:02 +0000358 def fileno(self):
359 """Invoke the underlying file object's fileno() method.
360
361 This will raise AttributeError if the underlying file object
362 doesn't support fileno().
363 """
364 return self.fileobj.fileno()
365
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000366 def rewind(self):
367 '''Return the uncompressed stream file position indicator to the
Tim Petersab9ba272001-08-09 21:40:30 +0000368 beginning of the file'''
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000369 if self.mode != READ:
370 raise IOError("Can't rewind in write mode")
371 self.fileobj.seek(0)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000372 self._new_member = True
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000373 self.extrabuf = ""
374 self.extrasize = 0
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000375 self.extrastart = 0
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000376 self.offset = 0
377
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000378 def readable(self):
379 return self.mode == READ
380
381 def writable(self):
382 return self.mode == WRITE
383
384 def seekable(self):
385 return True
386
Martin v. Löwis065f0c82006-11-12 10:41:39 +0000387 def seek(self, offset, whence=0):
388 if whence:
389 if whence == 1:
390 offset = self.offset + offset
391 else:
392 raise ValueError('Seek from end not supported')
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000393 if self.mode == WRITE:
394 if offset < self.offset:
395 raise IOError('Negative seek in write mode')
396 count = offset - self.offset
Tim Petersfb0ea522002-11-04 19:50:11 +0000397 for i in range(count // 1024):
398 self.write(1024 * '\0')
399 self.write((count % 1024) * '\0')
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000400 elif self.mode == READ:
401 if offset < self.offset:
402 # for negative seek, rewind and do positive seek
403 self.rewind()
404 count = offset - self.offset
Tim Petersfb0ea522002-11-04 19:50:11 +0000405 for i in range(count // 1024):
406 self.read(1024)
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000407 self.read(count % 1024)
408
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000409 return self.offset
410
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000411 def readline(self, size=-1):
Bob Ippolitod82c3102006-05-22 15:59:12 +0000412 if size < 0:
Antoine Pitrou673ddf92010-01-03 22:29:56 +0000413 # Shortcut common case - newline found in buffer.
414 offset = self.offset - self.extrastart
415 i = self.extrabuf.find('\n', offset) + 1
416 if i > 0:
417 self.extrasize -= i - offset
418 self.offset += i - offset
419 return self.extrabuf[offset: i]
420
Bob Ippolitod82c3102006-05-22 15:59:12 +0000421 size = sys.maxint
422 readsize = self.min_readsize
423 else:
424 readsize = size
Bob Ippolitob9759732006-05-22 15:22:46 +0000425 bufs = []
Bob Ippolitod82c3102006-05-22 15:59:12 +0000426 while size != 0:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000427 c = self.read(readsize)
Eric S. Raymondee5e61d2001-02-09 09:10:35 +0000428 i = c.find('\n')
Bob Ippolitod82c3102006-05-22 15:59:12 +0000429
430 # We set i=size to break out of the loop under two
431 # conditions: 1) there's no newline, and the chunk is
432 # larger than size, or 2) there is a newline, but the
433 # resulting line would be longer than 'size'.
434 if (size <= i) or (i == -1 and len(c) > size):
435 i = size - 1
Guido van Rossum15262191997-04-30 16:04:57 +0000436
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000437 if i >= 0 or c == '':
Bob Ippolitod82c3102006-05-22 15:59:12 +0000438 bufs.append(c[:i + 1]) # Add portion of last chunk
439 self._unread(c[i + 1:]) # Push back rest of chunk
440 break
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000441
Bob Ippolitob9759732006-05-22 15:22:46 +0000442 # Append chunk to list, decrease 'size',
443 bufs.append(c)
444 size = size - len(c)
445 readsize = min(size, readsize * 2)
Bob Ippolitod82c3102006-05-22 15:59:12 +0000446 if readsize > self.min_readsize:
447 self.min_readsize = min(readsize, self.min_readsize * 2, 512)
448 return ''.join(bufs) # Return resulting line
Tim Peters07e99cb2001-01-14 23:47:14 +0000449
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000450
451def _test():
452 # Act like gzip; with -d, act like gunzip.
453 # The input file is not deleted, however, nor are any other gzip
454 # options or features supported.
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000455 args = sys.argv[1:]
456 decompress = args and args[0] == "-d"
457 if decompress:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000458 args = args[1:]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000459 if not args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000460 args = ["-"]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000461 for arg in args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000462 if decompress:
463 if arg == "-":
464 f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)
465 g = sys.stdout
466 else:
467 if arg[-3:] != ".gz":
Walter Dörwald70a6b492004-02-12 17:35:32 +0000468 print "filename doesn't end in .gz:", repr(arg)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000469 continue
470 f = open(arg, "rb")
471 g = __builtin__.open(arg[:-3], "wb")
472 else:
473 if arg == "-":
474 f = sys.stdin
475 g = GzipFile(filename="", mode="wb", fileobj=sys.stdout)
476 else:
477 f = __builtin__.open(arg, "rb")
478 g = open(arg + ".gz", "wb")
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000479 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000480 chunk = f.read(1024)
481 if not chunk:
482 break
483 g.write(chunk)
484 if g is not sys.stdout:
485 g.close()
486 if f is not sys.stdin:
487 f.close()
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000488
489if __name__ == '__main__':
490 _test()