blob: 3c1ebf25142c925a8d76264fde3c196bfd522652 [file] [log] [blame]
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00001"""Functions that read and write gzipped files.
2
Guido van Rossum54f22ed2000-02-04 15:10:34 +00003The user of the file doesn't have to worry about the compression,
4but random access is not allowed."""
5
6# based on Andrew Kuchling's minigzip.py distributed with the zlib module
7
Tim Peters49667c22004-07-27 21:05:21 +00008import struct, sys, time
Guido van Rossum15262191997-04-30 16:04:57 +00009import zlib
Guido van Rossum68de3791997-07-19 20:22:23 +000010import __builtin__
Guido van Rossum15262191997-04-30 16:04:57 +000011
Skip Montanaro2dd42762001-01-23 15:35:05 +000012__all__ = ["GzipFile","open"]
13
Guido van Rossum15262191997-04-30 16:04:57 +000014FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
15
16READ, WRITE = 1, 2
17
Tim Petersfb0ea522002-11-04 19:50:11 +000018def U32(i):
19 """Return i as an unsigned integer, assuming it fits in 32 bits.
20
21 If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
22 """
23 if i < 0:
24 i += 1L << 32
25 return i
26
Tim Peters9288f952002-11-05 20:38:55 +000027def LOWU32(i):
28 """Return the low-order 32 bits of an int, as a non-negative int."""
29 return i & 0xFFFFFFFFL
30
Guido van Rossum15262191997-04-30 16:04:57 +000031def write32(output, value):
Jeremy Hyltonc19f9971999-03-23 23:05:34 +000032 output.write(struct.pack("<l", value))
Tim Peters07e99cb2001-01-14 23:47:14 +000033
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000034def write32u(output, value):
Tim Petersfb0ea522002-11-04 19:50:11 +000035 # The L format writes the bit pattern correctly whether signed
36 # or unsigned.
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000037 output.write(struct.pack("<L", value))
38
Guido van Rossum15262191997-04-30 16:04:57 +000039def read32(input):
Jeremy Hyltonc19f9971999-03-23 23:05:34 +000040 return struct.unpack("<l", input.read(4))[0]
Guido van Rossum15262191997-04-30 16:04:57 +000041
Fred Drakefa1591c1999-04-05 18:37:59 +000042def open(filename, mode="rb", compresslevel=9):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000043 """Shorthand for GzipFile(filename, mode, compresslevel).
44
45 The filename argument is required; mode defaults to 'rb'
46 and compresslevel defaults to 9.
47
48 """
Guido van Rossum15262191997-04-30 16:04:57 +000049 return GzipFile(filename, mode, compresslevel)
50
51class GzipFile:
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000052 """The GzipFile class simulates most of the methods of a file object with
Guido van Rossum97c5fcc2002-08-06 17:03:25 +000053 the exception of the readinto() and truncate() methods.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000054
55 """
Guido van Rossum15262191997-04-30 16:04:57 +000056
Guido van Rossum68de3791997-07-19 20:22:23 +000057 myfileobj = None
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +000058 max_read_chunk = 10 * 1024 * 1024 # 10Mb
Guido van Rossum68de3791997-07-19 20:22:23 +000059
Tim Peters07e99cb2001-01-14 23:47:14 +000060 def __init__(self, filename=None, mode=None,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000061 compresslevel=9, fileobj=None):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000062 """Constructor for the GzipFile class.
63
64 At least one of fileobj and filename must be given a
65 non-trivial value.
66
67 The new class instance is based on fileobj, which can be a regular
68 file, a StringIO object, or any other object which simulates a file.
69 It defaults to None, in which case filename is opened to provide
70 a file object.
71
72 When fileobj is not None, the filename argument is only used to be
73 included in the gzip file header, which may includes the original
74 filename of the uncompressed file. It defaults to the filename of
75 fileobj, if discernible; otherwise, it defaults to the empty string,
76 and in this case the original filename is not included in the header.
77
78 The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
79 depending on whether the file will be read or written. The default
80 is the mode of fileobj if discernible; otherwise, the default is 'rb'.
81 Be aware that only the 'rb', 'ab', and 'wb' values should be used
82 for cross-platform portability.
83
84 The compresslevel argument is an integer from 1 to 9 controlling the
85 level of compression; 1 is fastest and produces the least compression,
86 and 9 is slowest and produces the most compression. The default is 9.
87
88 """
89
Skip Montanaro12424bc2002-05-23 01:43:05 +000090 # guarantee the file is opened in binary mode on platforms
91 # that care about that sort of thing
92 if mode and 'b' not in mode:
93 mode += 'b'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000094 if fileobj is None:
Fred Drake9bb76d11999-04-05 18:33:40 +000095 fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
Guido van Rossum68de3791997-07-19 20:22:23 +000096 if filename is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000097 if hasattr(fileobj, 'name'): filename = fileobj.name
98 else: filename = ''
Guido van Rossum68de3791997-07-19 20:22:23 +000099 if mode is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000100 if hasattr(fileobj, 'mode'): mode = fileobj.mode
Fred Drake9bb76d11999-04-05 18:33:40 +0000101 else: mode = 'rb'
Guido van Rossum68de3791997-07-19 20:22:23 +0000102
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000103 if mode[0:1] == 'r':
104 self.mode = READ
Tim Peters07e99cb2001-01-14 23:47:14 +0000105 # Set flag indicating start of a new member
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000106 self._new_member = True
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000107 self.extrabuf = ""
108 self.extrasize = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000109 self.filename = filename
Guido van Rossum15262191997-04-30 16:04:57 +0000110
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000111 elif mode[0:1] == 'w' or mode[0:1] == 'a':
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000112 self.mode = WRITE
113 self._init_write(filename)
114 self.compress = zlib.compressobj(compresslevel,
Tim Peters07e99cb2001-01-14 23:47:14 +0000115 zlib.DEFLATED,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000116 -zlib.MAX_WBITS,
117 zlib.DEF_MEM_LEVEL,
118 0)
119 else:
Martin v. Löwisdb044892002-03-11 06:46:52 +0000120 raise IOError, "Mode " + mode + " not supported"
Guido van Rossum15262191997-04-30 16:04:57 +0000121
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000122 self.fileobj = fileobj
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000123 self.offset = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000124
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000125 if self.mode == WRITE:
126 self._write_gzip_header()
Guido van Rossum15262191997-04-30 16:04:57 +0000127
128 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000129 s = repr(self.fileobj)
130 return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
Guido van Rossum15262191997-04-30 16:04:57 +0000131
132 def _init_write(self, filename):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000133 if filename[-3:] != '.gz':
134 filename = filename + '.gz'
135 self.filename = filename
136 self.crc = zlib.crc32("")
137 self.size = 0
138 self.writebuf = []
139 self.bufsize = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000140
141 def _write_gzip_header(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000142 self.fileobj.write('\037\213') # magic header
143 self.fileobj.write('\010') # compression method
144 fname = self.filename[:-3]
145 flags = 0
146 if fname:
147 flags = FNAME
148 self.fileobj.write(chr(flags))
Guido van Rossum95bdd0b1999-04-12 14:34:16 +0000149 write32u(self.fileobj, long(time.time()))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000150 self.fileobj.write('\002')
151 self.fileobj.write('\377')
152 if fname:
153 self.fileobj.write(fname + '\000')
Guido van Rossum15262191997-04-30 16:04:57 +0000154
155 def _init_read(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000156 self.crc = zlib.crc32("")
157 self.size = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000158
159 def _read_gzip_header(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000160 magic = self.fileobj.read(2)
161 if magic != '\037\213':
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000162 raise IOError, 'Not a gzipped file'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000163 method = ord( self.fileobj.read(1) )
164 if method != 8:
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000165 raise IOError, 'Unknown compression method'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000166 flag = ord( self.fileobj.read(1) )
167 # modtime = self.fileobj.read(4)
168 # extraflag = self.fileobj.read(1)
169 # os = self.fileobj.read(1)
170 self.fileobj.read(6)
Guido van Rossum15262191997-04-30 16:04:57 +0000171
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000172 if flag & FEXTRA:
173 # Read & discard the extra field, if present
Tim Petersfb0ea522002-11-04 19:50:11 +0000174 xlen = ord(self.fileobj.read(1))
175 xlen = xlen + 256*ord(self.fileobj.read(1))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000176 self.fileobj.read(xlen)
177 if flag & FNAME:
178 # Read and discard a null-terminated string containing the filename
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000179 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000180 s = self.fileobj.read(1)
181 if not s or s=='\000':
182 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000183 if flag & FCOMMENT:
184 # Read and discard a null-terminated string containing a comment
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000185 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000186 s = self.fileobj.read(1)
187 if not s or s=='\000':
188 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000189 if flag & FHCRC:
190 self.fileobj.read(2) # Read & discard the 16-bit header CRC
Guido van Rossum15262191997-04-30 16:04:57 +0000191
192
193 def write(self,data):
Martin v. Löwisdb044892002-03-11 06:46:52 +0000194 if self.mode != WRITE:
195 import errno
196 raise IOError(errno.EBADF, "write() on read-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000197
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000198 if self.fileobj is None:
199 raise ValueError, "write() on closed GzipFile object"
200 if len(data) > 0:
201 self.size = self.size + len(data)
202 self.crc = zlib.crc32(data, self.crc)
203 self.fileobj.write( self.compress.compress(data) )
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000204 self.offset += len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000205
Guido van Rossum56068012000-02-02 16:51:06 +0000206 def read(self, size=-1):
Martin v. Löwisdb044892002-03-11 06:46:52 +0000207 if self.mode != READ:
208 import errno
Brett Cannonedfb3022003-12-04 19:28:06 +0000209 raise IOError(errno.EBADF, "read() on write-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000210
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000211 if self.extrasize <= 0 and self.fileobj is None:
212 return ''
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000213
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000214 readsize = 1024
Guido van Rossum56068012000-02-02 16:51:06 +0000215 if size < 0: # get the whole thing
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000216 try:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000217 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000218 self._read(readsize)
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000219 readsize = min(self.max_read_chunk, readsize * 2)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000220 except EOFError:
221 size = self.extrasize
222 else: # just get some more of it
223 try:
224 while size > self.extrasize:
225 self._read(readsize)
Andrew M. Kuchling01cb47b2005-06-09 14:19:32 +0000226 readsize = min(self.max_read_chunk, readsize * 2)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000227 except EOFError:
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000228 if size > self.extrasize:
229 size = self.extrasize
Tim Peters07e99cb2001-01-14 23:47:14 +0000230
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000231 chunk = self.extrabuf[:size]
232 self.extrabuf = self.extrabuf[size:]
233 self.extrasize = self.extrasize - size
Guido van Rossum15262191997-04-30 16:04:57 +0000234
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000235 self.offset += size
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000236 return chunk
Guido van Rossum15262191997-04-30 16:04:57 +0000237
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000238 def _unread(self, buf):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000239 self.extrabuf = buf + self.extrabuf
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000240 self.extrasize = len(buf) + self.extrasize
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000241 self.offset -= len(buf)
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000242
243 def _read(self, size=1024):
Tim Petersfb0ea522002-11-04 19:50:11 +0000244 if self.fileobj is None:
245 raise EOFError, "Reached EOF"
Tim Peters07e99cb2001-01-14 23:47:14 +0000246
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000247 if self._new_member:
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000248 # If the _new_member flag is set, we have to
249 # jump to the next member, if there is one.
Tim Peters07e99cb2001-01-14 23:47:14 +0000250 #
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000251 # First, check if we're at the end of the file;
252 # if so, it's time to stop; no more members to read.
253 pos = self.fileobj.tell() # Save current position
254 self.fileobj.seek(0, 2) # Seek to end of file
255 if pos == self.fileobj.tell():
Andrew M. Kuchling2d813e51999-09-06 16:34:51 +0000256 raise EOFError, "Reached EOF"
Tim Peters07e99cb2001-01-14 23:47:14 +0000257 else:
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000258 self.fileobj.seek( pos ) # Return to original position
Tim Peters07e99cb2001-01-14 23:47:14 +0000259
260 self._init_read()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000261 self._read_gzip_header()
262 self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000263 self._new_member = False
Tim Peters07e99cb2001-01-14 23:47:14 +0000264
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000265 # Read a chunk of data from the file
266 buf = self.fileobj.read(size)
Tim Peters07e99cb2001-01-14 23:47:14 +0000267
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000268 # If the EOF has been reached, flush the decompression object
269 # and mark this object as finished.
Tim Peters07e99cb2001-01-14 23:47:14 +0000270
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000271 if buf == "":
272 uncompress = self.decompress.flush()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000273 self._read_eof()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000274 self._add_read_data( uncompress )
275 raise EOFError, 'Reached EOF'
Tim Peters07e99cb2001-01-14 23:47:14 +0000276
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000277 uncompress = self.decompress.decompress(buf)
278 self._add_read_data( uncompress )
279
280 if self.decompress.unused_data != "":
281 # Ending case: we've come to the end of a member in the file,
282 # so seek back to the start of the unused data, finish up
283 # this member, and read a new gzip header.
284 # (The number of bytes to seek back is the length of the unused
285 # data, minus 8 because _read_eof() will rewind a further 8 bytes)
286 self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
287
288 # Check the CRC and file size, and set the flag so we read
Tim Peters07e99cb2001-01-14 23:47:14 +0000289 # a new member on the next call
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000290 self._read_eof()
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000291 self._new_member = True
Tim Peters07e99cb2001-01-14 23:47:14 +0000292
293 def _add_read_data(self, data):
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000294 self.crc = zlib.crc32(data, self.crc)
295 self.extrabuf = self.extrabuf + data
296 self.extrasize = self.extrasize + len(data)
297 self.size = self.size + len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000298
299 def _read_eof(self):
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000300 # We've read to the end of the file, so we have to rewind in order
Tim Peters07e99cb2001-01-14 23:47:14 +0000301 # to reread the 8 bytes containing the CRC and the file size.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000302 # We check the that the computed CRC and size of the
Tim Peters9288f952002-11-05 20:38:55 +0000303 # uncompressed data matches the stored values. Note that the size
304 # stored is the true file size mod 2**32.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000305 self.fileobj.seek(-8, 1)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000306 crc32 = read32(self.fileobj)
Tim Petersfb0ea522002-11-04 19:50:11 +0000307 isize = U32(read32(self.fileobj)) # may exceed 2GB
308 if U32(crc32) != U32(self.crc):
Andrew M. Kuchling64edd6a2003-02-05 21:35:07 +0000309 raise IOError, "CRC check failed"
Tim Peters9288f952002-11-05 20:38:55 +0000310 elif isize != LOWU32(self.size):
Andrew M. Kuchling64edd6a2003-02-05 21:35:07 +0000311 raise IOError, "Incorrect length of data produced"
Tim Peters07e99cb2001-01-14 23:47:14 +0000312
Guido van Rossum15262191997-04-30 16:04:57 +0000313 def close(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000314 if self.mode == WRITE:
315 self.fileobj.write(self.compress.flush())
316 write32(self.fileobj, self.crc)
Tim Peters9288f952002-11-05 20:38:55 +0000317 # self.size may exceed 2GB, or even 4GB
318 write32u(self.fileobj, LOWU32(self.size))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000319 self.fileobj = None
320 elif self.mode == READ:
321 self.fileobj = None
322 if self.myfileobj:
323 self.myfileobj.close()
324 self.myfileobj = None
Guido van Rossum15262191997-04-30 16:04:57 +0000325
Andrew M. Kuchling916fcc31999-08-10 13:19:30 +0000326 def __del__(self):
Jeremy Hyltone298c302000-05-08 16:59:59 +0000327 try:
328 if (self.myfileobj is None and
329 self.fileobj is None):
330 return
331 except AttributeError:
332 return
333 self.close()
Tim Peters07e99cb2001-01-14 23:47:14 +0000334
Martin v. Löwisf2a8d632005-03-03 08:35:22 +0000335 def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
336 if self.mode == WRITE:
Tim Peterseba28be2005-03-28 01:08:02 +0000337 # Ensure the compressor's buffer is flushed
338 self.fileobj.write(self.compress.flush(zlib_mode))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000339 self.fileobj.flush()
Guido van Rossum15262191997-04-30 16:04:57 +0000340
Tim Peters5cfb05e2004-07-27 21:02:02 +0000341 def fileno(self):
342 """Invoke the underlying file object's fileno() method.
343
344 This will raise AttributeError if the underlying file object
345 doesn't support fileno().
346 """
347 return self.fileobj.fileno()
348
Guido van Rossum15262191997-04-30 16:04:57 +0000349 def isatty(self):
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000350 return False
Guido van Rossum15262191997-04-30 16:04:57 +0000351
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000352 def tell(self):
353 return self.offset
354
355 def rewind(self):
356 '''Return the uncompressed stream file position indicator to the
Tim Petersab9ba272001-08-09 21:40:30 +0000357 beginning of the file'''
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000358 if self.mode != READ:
359 raise IOError("Can't rewind in write mode")
360 self.fileobj.seek(0)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000361 self._new_member = True
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000362 self.extrabuf = ""
363 self.extrasize = 0
364 self.offset = 0
365
366 def seek(self, offset):
367 if self.mode == WRITE:
368 if offset < self.offset:
369 raise IOError('Negative seek in write mode')
370 count = offset - self.offset
Tim Petersfb0ea522002-11-04 19:50:11 +0000371 for i in range(count // 1024):
372 self.write(1024 * '\0')
373 self.write((count % 1024) * '\0')
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000374 elif self.mode == READ:
375 if offset < self.offset:
376 # for negative seek, rewind and do positive seek
377 self.rewind()
378 count = offset - self.offset
Tim Petersfb0ea522002-11-04 19:50:11 +0000379 for i in range(count // 1024):
380 self.read(1024)
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000381 self.read(count % 1024)
382
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000383 def readline(self, size=-1):
384 if size < 0: size = sys.maxint
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000385 bufs = []
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000386 readsize = min(100, size) # Read from the file in small chunks
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000387 while True:
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000388 if size == 0:
Eric S. Raymondee5e61d2001-02-09 09:10:35 +0000389 return "".join(bufs) # Return resulting line
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000390
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000391 c = self.read(readsize)
Eric S. Raymondee5e61d2001-02-09 09:10:35 +0000392 i = c.find('\n')
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000393 if size is not None:
394 # We set i=size to break out of the loop under two
Tim Peters07e99cb2001-01-14 23:47:14 +0000395 # conditions: 1) there's no newline, and the chunk is
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000396 # larger than size, or 2) there is a newline, but the
397 # resulting line would be longer than 'size'.
398 if i==-1 and len(c) > size: i=size-1
399 elif size <= i: i = size -1
Guido van Rossum15262191997-04-30 16:04:57 +0000400
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000401 if i >= 0 or c == '':
402 bufs.append(c[:i+1]) # Add portion of last chunk
403 self._unread(c[i+1:]) # Push back rest of chunk
Eric S. Raymondee5e61d2001-02-09 09:10:35 +0000404 return ''.join(bufs) # Return resulting line
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000405
406 # Append chunk to list, decrease 'size',
407 bufs.append(c)
408 size = size - len(c)
409 readsize = min(size, readsize * 2)
Tim Peters07e99cb2001-01-14 23:47:14 +0000410
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000411 def readlines(self, sizehint=0):
412 # Negative numbers result in reading all the lines
Tim Petersfb0ea522002-11-04 19:50:11 +0000413 if sizehint <= 0:
414 sizehint = sys.maxint
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000415 L = []
416 while sizehint > 0:
417 line = self.readline()
Tim Petersfb0ea522002-11-04 19:50:11 +0000418 if line == "":
419 break
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000420 L.append(line)
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000421 sizehint = sizehint - len(line)
422
423 return L
Guido van Rossum15262191997-04-30 16:04:57 +0000424
Guido van Rossum68de3791997-07-19 20:22:23 +0000425 def writelines(self, L):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000426 for line in L:
427 self.write(line)
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000428
Neil Schemenauercacbdf62002-03-20 18:36:00 +0000429 def __iter__(self):
430 return self
431
432 def next(self):
433 line = self.readline()
434 if line:
435 return line
436 else:
437 raise StopIteration
438
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000439
440def _test():
441 # Act like gzip; with -d, act like gunzip.
442 # The input file is not deleted, however, nor are any other gzip
443 # options or features supported.
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000444 args = sys.argv[1:]
445 decompress = args and args[0] == "-d"
446 if decompress:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000447 args = args[1:]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000448 if not args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000449 args = ["-"]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000450 for arg in args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000451 if decompress:
452 if arg == "-":
453 f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)
454 g = sys.stdout
455 else:
456 if arg[-3:] != ".gz":
Walter Dörwald70a6b492004-02-12 17:35:32 +0000457 print "filename doesn't end in .gz:", repr(arg)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000458 continue
459 f = open(arg, "rb")
460 g = __builtin__.open(arg[:-3], "wb")
461 else:
462 if arg == "-":
463 f = sys.stdin
464 g = GzipFile(filename="", mode="wb", fileobj=sys.stdout)
465 else:
466 f = __builtin__.open(arg, "rb")
467 g = open(arg + ".gz", "wb")
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000468 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000469 chunk = f.read(1024)
470 if not chunk:
471 break
472 g.write(chunk)
473 if g is not sys.stdout:
474 g.close()
475 if f is not sys.stdin:
476 f.close()
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000477
478if __name__ == '__main__':
479 _test()