blob: 4ecd2115ffe7b847c785cded9b1d6340196b39d9 [file] [log] [blame]
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00001"""Functions that read and write gzipped files.
2
Guido van Rossum54f22ed2000-02-04 15:10:34 +00003The user of the file doesn't have to worry about the compression,
4but random access is not allowed."""
5
6# based on Andrew Kuchling's minigzip.py distributed with the zlib module
7
Tim Peters49667c22004-07-27 21:05:21 +00008import struct, sys, time
Guido van Rossum15262191997-04-30 16:04:57 +00009import zlib
Guido van Rossum68de3791997-07-19 20:22:23 +000010import __builtin__
Guido van Rossum15262191997-04-30 16:04:57 +000011
Skip Montanaro2dd42762001-01-23 15:35:05 +000012__all__ = ["GzipFile","open"]
13
Guido van Rossum15262191997-04-30 16:04:57 +000014FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
15
16READ, WRITE = 1, 2
17
Tim Petersfb0ea522002-11-04 19:50:11 +000018def U32(i):
19 """Return i as an unsigned integer, assuming it fits in 32 bits.
20
21 If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
22 """
23 if i < 0:
24 i += 1L << 32
25 return i
26
Tim Peters9288f952002-11-05 20:38:55 +000027def LOWU32(i):
28 """Return the low-order 32 bits of an int, as a non-negative int."""
29 return i & 0xFFFFFFFFL
30
Guido van Rossum15262191997-04-30 16:04:57 +000031def write32(output, value):
Jeremy Hyltonc19f9971999-03-23 23:05:34 +000032 output.write(struct.pack("<l", value))
Tim Peters07e99cb2001-01-14 23:47:14 +000033
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000034def write32u(output, value):
Tim Petersfb0ea522002-11-04 19:50:11 +000035 # The L format writes the bit pattern correctly whether signed
36 # or unsigned.
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000037 output.write(struct.pack("<L", value))
38
Guido van Rossum15262191997-04-30 16:04:57 +000039def read32(input):
Jeremy Hyltonc19f9971999-03-23 23:05:34 +000040 return struct.unpack("<l", input.read(4))[0]
Guido van Rossum15262191997-04-30 16:04:57 +000041
Fred Drakefa1591c1999-04-05 18:37:59 +000042def open(filename, mode="rb", compresslevel=9):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000043 """Shorthand for GzipFile(filename, mode, compresslevel).
44
45 The filename argument is required; mode defaults to 'rb'
46 and compresslevel defaults to 9.
47
48 """
Guido van Rossum15262191997-04-30 16:04:57 +000049 return GzipFile(filename, mode, compresslevel)
50
51class GzipFile:
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000052 """The GzipFile class simulates most of the methods of a file object with
Guido van Rossum97c5fcc2002-08-06 17:03:25 +000053 the exception of the readinto() and truncate() methods.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000054
55 """
Guido van Rossum15262191997-04-30 16:04:57 +000056
Guido van Rossum68de3791997-07-19 20:22:23 +000057 myfileobj = None
58
Tim Peters07e99cb2001-01-14 23:47:14 +000059 def __init__(self, filename=None, mode=None,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000060 compresslevel=9, fileobj=None):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000061 """Constructor for the GzipFile class.
62
63 At least one of fileobj and filename must be given a
64 non-trivial value.
65
66 The new class instance is based on fileobj, which can be a regular
67 file, a StringIO object, or any other object which simulates a file.
68 It defaults to None, in which case filename is opened to provide
69 a file object.
70
71 When fileobj is not None, the filename argument is only used to be
72 included in the gzip file header, which may includes the original
73 filename of the uncompressed file. It defaults to the filename of
74 fileobj, if discernible; otherwise, it defaults to the empty string,
75 and in this case the original filename is not included in the header.
76
77 The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
78 depending on whether the file will be read or written. The default
79 is the mode of fileobj if discernible; otherwise, the default is 'rb'.
80 Be aware that only the 'rb', 'ab', and 'wb' values should be used
81 for cross-platform portability.
82
83 The compresslevel argument is an integer from 1 to 9 controlling the
84 level of compression; 1 is fastest and produces the least compression,
85 and 9 is slowest and produces the most compression. The default is 9.
86
87 """
88
Skip Montanaro12424bc2002-05-23 01:43:05 +000089 # guarantee the file is opened in binary mode on platforms
90 # that care about that sort of thing
91 if mode and 'b' not in mode:
92 mode += 'b'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000093 if fileobj is None:
Fred Drake9bb76d11999-04-05 18:33:40 +000094 fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
Guido van Rossum68de3791997-07-19 20:22:23 +000095 if filename is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000096 if hasattr(fileobj, 'name'): filename = fileobj.name
97 else: filename = ''
Guido van Rossum68de3791997-07-19 20:22:23 +000098 if mode is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000099 if hasattr(fileobj, 'mode'): mode = fileobj.mode
Fred Drake9bb76d11999-04-05 18:33:40 +0000100 else: mode = 'rb'
Guido van Rossum68de3791997-07-19 20:22:23 +0000101
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000102 if mode[0:1] == 'r':
103 self.mode = READ
Tim Peters07e99cb2001-01-14 23:47:14 +0000104 # Set flag indicating start of a new member
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000105 self._new_member = True
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000106 self.extrabuf = ""
107 self.extrasize = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000108 self.filename = filename
Guido van Rossum15262191997-04-30 16:04:57 +0000109
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000110 elif mode[0:1] == 'w' or mode[0:1] == 'a':
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000111 self.mode = WRITE
112 self._init_write(filename)
113 self.compress = zlib.compressobj(compresslevel,
Tim Peters07e99cb2001-01-14 23:47:14 +0000114 zlib.DEFLATED,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000115 -zlib.MAX_WBITS,
116 zlib.DEF_MEM_LEVEL,
117 0)
118 else:
Martin v. Löwisdb044892002-03-11 06:46:52 +0000119 raise IOError, "Mode " + mode + " not supported"
Guido van Rossum15262191997-04-30 16:04:57 +0000120
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000121 self.fileobj = fileobj
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000122 self.offset = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000123
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000124 if self.mode == WRITE:
125 self._write_gzip_header()
Guido van Rossum15262191997-04-30 16:04:57 +0000126
127 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000128 s = repr(self.fileobj)
129 return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
Guido van Rossum15262191997-04-30 16:04:57 +0000130
131 def _init_write(self, filename):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000132 if filename[-3:] != '.gz':
133 filename = filename + '.gz'
134 self.filename = filename
135 self.crc = zlib.crc32("")
136 self.size = 0
137 self.writebuf = []
138 self.bufsize = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000139
140 def _write_gzip_header(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000141 self.fileobj.write('\037\213') # magic header
142 self.fileobj.write('\010') # compression method
143 fname = self.filename[:-3]
144 flags = 0
145 if fname:
146 flags = FNAME
147 self.fileobj.write(chr(flags))
Guido van Rossum95bdd0b1999-04-12 14:34:16 +0000148 write32u(self.fileobj, long(time.time()))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000149 self.fileobj.write('\002')
150 self.fileobj.write('\377')
151 if fname:
152 self.fileobj.write(fname + '\000')
Guido van Rossum15262191997-04-30 16:04:57 +0000153
154 def _init_read(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000155 self.crc = zlib.crc32("")
156 self.size = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000157
158 def _read_gzip_header(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000159 magic = self.fileobj.read(2)
160 if magic != '\037\213':
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000161 raise IOError, 'Not a gzipped file'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000162 method = ord( self.fileobj.read(1) )
163 if method != 8:
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000164 raise IOError, 'Unknown compression method'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000165 flag = ord( self.fileobj.read(1) )
166 # modtime = self.fileobj.read(4)
167 # extraflag = self.fileobj.read(1)
168 # os = self.fileobj.read(1)
169 self.fileobj.read(6)
Guido van Rossum15262191997-04-30 16:04:57 +0000170
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000171 if flag & FEXTRA:
172 # Read & discard the extra field, if present
Tim Petersfb0ea522002-11-04 19:50:11 +0000173 xlen = ord(self.fileobj.read(1))
174 xlen = xlen + 256*ord(self.fileobj.read(1))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000175 self.fileobj.read(xlen)
176 if flag & FNAME:
177 # Read and discard a null-terminated string containing the filename
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000178 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000179 s = self.fileobj.read(1)
180 if not s or s=='\000':
181 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000182 if flag & FCOMMENT:
183 # Read and discard a null-terminated string containing a comment
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000184 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000185 s = self.fileobj.read(1)
186 if not s or s=='\000':
187 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000188 if flag & FHCRC:
189 self.fileobj.read(2) # Read & discard the 16-bit header CRC
Guido van Rossum15262191997-04-30 16:04:57 +0000190
191
192 def write(self,data):
Martin v. Löwisdb044892002-03-11 06:46:52 +0000193 if self.mode != WRITE:
194 import errno
195 raise IOError(errno.EBADF, "write() on read-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000196
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000197 if self.fileobj is None:
198 raise ValueError, "write() on closed GzipFile object"
199 if len(data) > 0:
200 self.size = self.size + len(data)
201 self.crc = zlib.crc32(data, self.crc)
202 self.fileobj.write( self.compress.compress(data) )
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000203 self.offset += len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000204
Guido van Rossum56068012000-02-02 16:51:06 +0000205 def read(self, size=-1):
Martin v. Löwisdb044892002-03-11 06:46:52 +0000206 if self.mode != READ:
207 import errno
Brett Cannonedfb3022003-12-04 19:28:06 +0000208 raise IOError(errno.EBADF, "read() on write-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000209
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000210 if self.extrasize <= 0 and self.fileobj is None:
211 return ''
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000212
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000213 readsize = 1024
Guido van Rossum56068012000-02-02 16:51:06 +0000214 if size < 0: # get the whole thing
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000215 try:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000216 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000217 self._read(readsize)
218 readsize = readsize * 2
219 except EOFError:
220 size = self.extrasize
221 else: # just get some more of it
222 try:
223 while size > self.extrasize:
224 self._read(readsize)
225 readsize = readsize * 2
226 except EOFError:
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000227 if size > self.extrasize:
228 size = self.extrasize
Tim Peters07e99cb2001-01-14 23:47:14 +0000229
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000230 chunk = self.extrabuf[:size]
231 self.extrabuf = self.extrabuf[size:]
232 self.extrasize = self.extrasize - size
Guido van Rossum15262191997-04-30 16:04:57 +0000233
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000234 self.offset += size
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000235 return chunk
Guido van Rossum15262191997-04-30 16:04:57 +0000236
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000237 def _unread(self, buf):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000238 self.extrabuf = buf + self.extrabuf
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000239 self.extrasize = len(buf) + self.extrasize
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000240 self.offset -= len(buf)
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000241
242 def _read(self, size=1024):
Tim Petersfb0ea522002-11-04 19:50:11 +0000243 if self.fileobj is None:
244 raise EOFError, "Reached EOF"
Tim Peters07e99cb2001-01-14 23:47:14 +0000245
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000246 if self._new_member:
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000247 # If the _new_member flag is set, we have to
248 # jump to the next member, if there is one.
Tim Peters07e99cb2001-01-14 23:47:14 +0000249 #
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000250 # First, check if we're at the end of the file;
251 # if so, it's time to stop; no more members to read.
252 pos = self.fileobj.tell() # Save current position
253 self.fileobj.seek(0, 2) # Seek to end of file
254 if pos == self.fileobj.tell():
Andrew M. Kuchling2d813e51999-09-06 16:34:51 +0000255 raise EOFError, "Reached EOF"
Tim Peters07e99cb2001-01-14 23:47:14 +0000256 else:
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000257 self.fileobj.seek( pos ) # Return to original position
Tim Peters07e99cb2001-01-14 23:47:14 +0000258
259 self._init_read()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000260 self._read_gzip_header()
261 self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000262 self._new_member = False
Tim Peters07e99cb2001-01-14 23:47:14 +0000263
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000264 # Read a chunk of data from the file
265 buf = self.fileobj.read(size)
Tim Peters07e99cb2001-01-14 23:47:14 +0000266
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000267 # If the EOF has been reached, flush the decompression object
268 # and mark this object as finished.
Tim Peters07e99cb2001-01-14 23:47:14 +0000269
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000270 if buf == "":
271 uncompress = self.decompress.flush()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000272 self._read_eof()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000273 self._add_read_data( uncompress )
274 raise EOFError, 'Reached EOF'
Tim Peters07e99cb2001-01-14 23:47:14 +0000275
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000276 uncompress = self.decompress.decompress(buf)
277 self._add_read_data( uncompress )
278
279 if self.decompress.unused_data != "":
280 # Ending case: we've come to the end of a member in the file,
281 # so seek back to the start of the unused data, finish up
282 # this member, and read a new gzip header.
283 # (The number of bytes to seek back is the length of the unused
284 # data, minus 8 because _read_eof() will rewind a further 8 bytes)
285 self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
286
287 # Check the CRC and file size, and set the flag so we read
Tim Peters07e99cb2001-01-14 23:47:14 +0000288 # a new member on the next call
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000289 self._read_eof()
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000290 self._new_member = True
Tim Peters07e99cb2001-01-14 23:47:14 +0000291
292 def _add_read_data(self, data):
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000293 self.crc = zlib.crc32(data, self.crc)
294 self.extrabuf = self.extrabuf + data
295 self.extrasize = self.extrasize + len(data)
296 self.size = self.size + len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000297
298 def _read_eof(self):
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000299 # We've read to the end of the file, so we have to rewind in order
Tim Peters07e99cb2001-01-14 23:47:14 +0000300 # to reread the 8 bytes containing the CRC and the file size.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000301 # We check the that the computed CRC and size of the
Tim Peters9288f952002-11-05 20:38:55 +0000302 # uncompressed data matches the stored values. Note that the size
303 # stored is the true file size mod 2**32.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000304 self.fileobj.seek(-8, 1)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000305 crc32 = read32(self.fileobj)
Tim Petersfb0ea522002-11-04 19:50:11 +0000306 isize = U32(read32(self.fileobj)) # may exceed 2GB
307 if U32(crc32) != U32(self.crc):
Andrew M. Kuchling64edd6a2003-02-05 21:35:07 +0000308 raise IOError, "CRC check failed"
Tim Peters9288f952002-11-05 20:38:55 +0000309 elif isize != LOWU32(self.size):
Andrew M. Kuchling64edd6a2003-02-05 21:35:07 +0000310 raise IOError, "Incorrect length of data produced"
Tim Peters07e99cb2001-01-14 23:47:14 +0000311
Guido van Rossum15262191997-04-30 16:04:57 +0000312 def close(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000313 if self.mode == WRITE:
314 self.fileobj.write(self.compress.flush())
315 write32(self.fileobj, self.crc)
Tim Peters9288f952002-11-05 20:38:55 +0000316 # self.size may exceed 2GB, or even 4GB
317 write32u(self.fileobj, LOWU32(self.size))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000318 self.fileobj = None
319 elif self.mode == READ:
320 self.fileobj = None
321 if self.myfileobj:
322 self.myfileobj.close()
323 self.myfileobj = None
Guido van Rossum15262191997-04-30 16:04:57 +0000324
Andrew M. Kuchling916fcc31999-08-10 13:19:30 +0000325 def __del__(self):
Jeremy Hyltone298c302000-05-08 16:59:59 +0000326 try:
327 if (self.myfileobj is None and
328 self.fileobj is None):
329 return
330 except AttributeError:
331 return
332 self.close()
Tim Peters07e99cb2001-01-14 23:47:14 +0000333
Martin v. Löwisf2a8d632005-03-03 08:35:22 +0000334 def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
335 if self.mode == WRITE:
Tim Peterseba28be2005-03-28 01:08:02 +0000336 # Ensure the compressor's buffer is flushed
337 self.fileobj.write(self.compress.flush(zlib_mode))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000338 self.fileobj.flush()
Guido van Rossum15262191997-04-30 16:04:57 +0000339
Tim Peters5cfb05e2004-07-27 21:02:02 +0000340 def fileno(self):
341 """Invoke the underlying file object's fileno() method.
342
343 This will raise AttributeError if the underlying file object
344 doesn't support fileno().
345 """
346 return self.fileobj.fileno()
347
Guido van Rossum15262191997-04-30 16:04:57 +0000348 def isatty(self):
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000349 return False
Guido van Rossum15262191997-04-30 16:04:57 +0000350
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000351 def tell(self):
352 return self.offset
353
354 def rewind(self):
355 '''Return the uncompressed stream file position indicator to the
Tim Petersab9ba272001-08-09 21:40:30 +0000356 beginning of the file'''
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000357 if self.mode != READ:
358 raise IOError("Can't rewind in write mode")
359 self.fileobj.seek(0)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000360 self._new_member = True
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000361 self.extrabuf = ""
362 self.extrasize = 0
363 self.offset = 0
364
365 def seek(self, offset):
366 if self.mode == WRITE:
367 if offset < self.offset:
368 raise IOError('Negative seek in write mode')
369 count = offset - self.offset
Tim Petersfb0ea522002-11-04 19:50:11 +0000370 for i in range(count // 1024):
371 self.write(1024 * '\0')
372 self.write((count % 1024) * '\0')
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000373 elif self.mode == READ:
374 if offset < self.offset:
375 # for negative seek, rewind and do positive seek
376 self.rewind()
377 count = offset - self.offset
Tim Petersfb0ea522002-11-04 19:50:11 +0000378 for i in range(count // 1024):
379 self.read(1024)
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000380 self.read(count % 1024)
381
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000382 def readline(self, size=-1):
383 if size < 0: size = sys.maxint
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000384 bufs = []
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000385 readsize = min(100, size) # Read from the file in small chunks
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000386 while True:
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000387 if size == 0:
Eric S. Raymondee5e61d2001-02-09 09:10:35 +0000388 return "".join(bufs) # Return resulting line
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000389
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000390 c = self.read(readsize)
Eric S. Raymondee5e61d2001-02-09 09:10:35 +0000391 i = c.find('\n')
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000392 if size is not None:
393 # We set i=size to break out of the loop under two
Tim Peters07e99cb2001-01-14 23:47:14 +0000394 # conditions: 1) there's no newline, and the chunk is
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000395 # larger than size, or 2) there is a newline, but the
396 # resulting line would be longer than 'size'.
397 if i==-1 and len(c) > size: i=size-1
398 elif size <= i: i = size -1
Guido van Rossum15262191997-04-30 16:04:57 +0000399
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000400 if i >= 0 or c == '':
401 bufs.append(c[:i+1]) # Add portion of last chunk
402 self._unread(c[i+1:]) # Push back rest of chunk
Eric S. Raymondee5e61d2001-02-09 09:10:35 +0000403 return ''.join(bufs) # Return resulting line
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000404
405 # Append chunk to list, decrease 'size',
406 bufs.append(c)
407 size = size - len(c)
408 readsize = min(size, readsize * 2)
Tim Peters07e99cb2001-01-14 23:47:14 +0000409
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000410 def readlines(self, sizehint=0):
411 # Negative numbers result in reading all the lines
Tim Petersfb0ea522002-11-04 19:50:11 +0000412 if sizehint <= 0:
413 sizehint = sys.maxint
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000414 L = []
415 while sizehint > 0:
416 line = self.readline()
Tim Petersfb0ea522002-11-04 19:50:11 +0000417 if line == "":
418 break
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000419 L.append(line)
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000420 sizehint = sizehint - len(line)
421
422 return L
Guido van Rossum15262191997-04-30 16:04:57 +0000423
Guido van Rossum68de3791997-07-19 20:22:23 +0000424 def writelines(self, L):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000425 for line in L:
426 self.write(line)
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000427
Neil Schemenauercacbdf62002-03-20 18:36:00 +0000428 def __iter__(self):
429 return self
430
431 def next(self):
432 line = self.readline()
433 if line:
434 return line
435 else:
436 raise StopIteration
437
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000438
439def _test():
440 # Act like gzip; with -d, act like gunzip.
441 # The input file is not deleted, however, nor are any other gzip
442 # options or features supported.
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000443 args = sys.argv[1:]
444 decompress = args and args[0] == "-d"
445 if decompress:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000446 args = args[1:]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000447 if not args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000448 args = ["-"]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000449 for arg in args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000450 if decompress:
451 if arg == "-":
452 f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)
453 g = sys.stdout
454 else:
455 if arg[-3:] != ".gz":
Walter Dörwald70a6b492004-02-12 17:35:32 +0000456 print "filename doesn't end in .gz:", repr(arg)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000457 continue
458 f = open(arg, "rb")
459 g = __builtin__.open(arg[:-3], "wb")
460 else:
461 if arg == "-":
462 f = sys.stdin
463 g = GzipFile(filename="", mode="wb", fileobj=sys.stdout)
464 else:
465 f = __builtin__.open(arg, "rb")
466 g = open(arg + ".gz", "wb")
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000467 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000468 chunk = f.read(1024)
469 if not chunk:
470 break
471 g.write(chunk)
472 if g is not sys.stdout:
473 g.close()
474 if f is not sys.stdin:
475 f.close()
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000476
477if __name__ == '__main__':
478 _test()