blob: 8802adb0d9ac57ad796869e2c0f77521a9b3b2dc [file] [log] [blame]
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00001"""Functions that read and write gzipped files.
2
Guido van Rossum54f22ed2000-02-04 15:10:34 +00003The user of the file doesn't have to worry about the compression,
4but random access is not allowed."""
5
6# based on Andrew Kuchling's minigzip.py distributed with the zlib module
7
Eric S. Raymondee5e61d2001-02-09 09:10:35 +00008import struct, sys, time
Guido van Rossum15262191997-04-30 16:04:57 +00009import zlib
Guido van Rossum68de3791997-07-19 20:22:23 +000010import __builtin__
Guido van Rossum15262191997-04-30 16:04:57 +000011
Skip Montanaro2dd42762001-01-23 15:35:05 +000012__all__ = ["GzipFile","open"]
13
Guido van Rossum15262191997-04-30 16:04:57 +000014FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
15
16READ, WRITE = 1, 2
17
Tim Petersfb0ea522002-11-04 19:50:11 +000018def U32(i):
19 """Return i as an unsigned integer, assuming it fits in 32 bits.
20
21 If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
22 """
23 if i < 0:
24 i += 1L << 32
25 return i
26
Guido van Rossum15262191997-04-30 16:04:57 +000027def write32(output, value):
Jeremy Hyltonc19f9971999-03-23 23:05:34 +000028 output.write(struct.pack("<l", value))
Tim Peters07e99cb2001-01-14 23:47:14 +000029
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000030def write32u(output, value):
Tim Petersfb0ea522002-11-04 19:50:11 +000031 # The L format writes the bit pattern correctly whether signed
32 # or unsigned.
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000033 output.write(struct.pack("<L", value))
34
Guido van Rossum15262191997-04-30 16:04:57 +000035def read32(input):
Jeremy Hyltonc19f9971999-03-23 23:05:34 +000036 return struct.unpack("<l", input.read(4))[0]
Guido van Rossum15262191997-04-30 16:04:57 +000037
Fred Drakefa1591c1999-04-05 18:37:59 +000038def open(filename, mode="rb", compresslevel=9):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000039 """Shorthand for GzipFile(filename, mode, compresslevel).
40
41 The filename argument is required; mode defaults to 'rb'
42 and compresslevel defaults to 9.
43
44 """
Guido van Rossum15262191997-04-30 16:04:57 +000045 return GzipFile(filename, mode, compresslevel)
46
47class GzipFile:
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000048 """The GzipFile class simulates most of the methods of a file object with
Guido van Rossum97c5fcc2002-08-06 17:03:25 +000049 the exception of the readinto() and truncate() methods.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000050
51 """
Guido van Rossum15262191997-04-30 16:04:57 +000052
Guido van Rossum68de3791997-07-19 20:22:23 +000053 myfileobj = None
54
Tim Peters07e99cb2001-01-14 23:47:14 +000055 def __init__(self, filename=None, mode=None,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000056 compresslevel=9, fileobj=None):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000057 """Constructor for the GzipFile class.
58
59 At least one of fileobj and filename must be given a
60 non-trivial value.
61
62 The new class instance is based on fileobj, which can be a regular
63 file, a StringIO object, or any other object which simulates a file.
64 It defaults to None, in which case filename is opened to provide
65 a file object.
66
67 When fileobj is not None, the filename argument is only used to be
68 included in the gzip file header, which may includes the original
69 filename of the uncompressed file. It defaults to the filename of
70 fileobj, if discernible; otherwise, it defaults to the empty string,
71 and in this case the original filename is not included in the header.
72
73 The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
74 depending on whether the file will be read or written. The default
75 is the mode of fileobj if discernible; otherwise, the default is 'rb'.
76 Be aware that only the 'rb', 'ab', and 'wb' values should be used
77 for cross-platform portability.
78
79 The compresslevel argument is an integer from 1 to 9 controlling the
80 level of compression; 1 is fastest and produces the least compression,
81 and 9 is slowest and produces the most compression. The default is 9.
82
83 """
84
Skip Montanaro12424bc2002-05-23 01:43:05 +000085 # guarantee the file is opened in binary mode on platforms
86 # that care about that sort of thing
87 if mode and 'b' not in mode:
88 mode += 'b'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000089 if fileobj is None:
Fred Drake9bb76d11999-04-05 18:33:40 +000090 fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
Guido van Rossum68de3791997-07-19 20:22:23 +000091 if filename is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000092 if hasattr(fileobj, 'name'): filename = fileobj.name
93 else: filename = ''
Guido van Rossum68de3791997-07-19 20:22:23 +000094 if mode is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000095 if hasattr(fileobj, 'mode'): mode = fileobj.mode
Fred Drake9bb76d11999-04-05 18:33:40 +000096 else: mode = 'rb'
Guido van Rossum68de3791997-07-19 20:22:23 +000097
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000098 if mode[0:1] == 'r':
99 self.mode = READ
Tim Peters07e99cb2001-01-14 23:47:14 +0000100 # Set flag indicating start of a new member
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000101 self._new_member = True
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000102 self.extrabuf = ""
103 self.extrasize = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000104 self.filename = filename
Guido van Rossum15262191997-04-30 16:04:57 +0000105
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000106 elif mode[0:1] == 'w' or mode[0:1] == 'a':
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000107 self.mode = WRITE
108 self._init_write(filename)
109 self.compress = zlib.compressobj(compresslevel,
Tim Peters07e99cb2001-01-14 23:47:14 +0000110 zlib.DEFLATED,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000111 -zlib.MAX_WBITS,
112 zlib.DEF_MEM_LEVEL,
113 0)
114 else:
Martin v. Löwisdb044892002-03-11 06:46:52 +0000115 raise IOError, "Mode " + mode + " not supported"
Guido van Rossum15262191997-04-30 16:04:57 +0000116
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000117 self.fileobj = fileobj
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000118 self.offset = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000119
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000120 if self.mode == WRITE:
121 self._write_gzip_header()
Guido van Rossum15262191997-04-30 16:04:57 +0000122
123 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000124 s = repr(self.fileobj)
125 return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
Guido van Rossum15262191997-04-30 16:04:57 +0000126
127 def _init_write(self, filename):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000128 if filename[-3:] != '.gz':
129 filename = filename + '.gz'
130 self.filename = filename
131 self.crc = zlib.crc32("")
132 self.size = 0
133 self.writebuf = []
134 self.bufsize = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000135
136 def _write_gzip_header(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000137 self.fileobj.write('\037\213') # magic header
138 self.fileobj.write('\010') # compression method
139 fname = self.filename[:-3]
140 flags = 0
141 if fname:
142 flags = FNAME
143 self.fileobj.write(chr(flags))
Guido van Rossum95bdd0b1999-04-12 14:34:16 +0000144 write32u(self.fileobj, long(time.time()))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000145 self.fileobj.write('\002')
146 self.fileobj.write('\377')
147 if fname:
148 self.fileobj.write(fname + '\000')
Guido van Rossum15262191997-04-30 16:04:57 +0000149
150 def _init_read(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000151 self.crc = zlib.crc32("")
152 self.size = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000153
154 def _read_gzip_header(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000155 magic = self.fileobj.read(2)
156 if magic != '\037\213':
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000157 raise IOError, 'Not a gzipped file'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000158 method = ord( self.fileobj.read(1) )
159 if method != 8:
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000160 raise IOError, 'Unknown compression method'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000161 flag = ord( self.fileobj.read(1) )
162 # modtime = self.fileobj.read(4)
163 # extraflag = self.fileobj.read(1)
164 # os = self.fileobj.read(1)
165 self.fileobj.read(6)
Guido van Rossum15262191997-04-30 16:04:57 +0000166
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000167 if flag & FEXTRA:
168 # Read & discard the extra field, if present
Tim Petersfb0ea522002-11-04 19:50:11 +0000169 xlen = ord(self.fileobj.read(1))
170 xlen = xlen + 256*ord(self.fileobj.read(1))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000171 self.fileobj.read(xlen)
172 if flag & FNAME:
173 # Read and discard a null-terminated string containing the filename
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000174 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000175 s = self.fileobj.read(1)
176 if not s or s=='\000':
177 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000178 if flag & FCOMMENT:
179 # Read and discard a null-terminated string containing a comment
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000180 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000181 s = self.fileobj.read(1)
182 if not s or s=='\000':
183 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000184 if flag & FHCRC:
185 self.fileobj.read(2) # Read & discard the 16-bit header CRC
Guido van Rossum15262191997-04-30 16:04:57 +0000186
187
188 def write(self,data):
Martin v. Löwisdb044892002-03-11 06:46:52 +0000189 if self.mode != WRITE:
190 import errno
191 raise IOError(errno.EBADF, "write() on read-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000192
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000193 if self.fileobj is None:
194 raise ValueError, "write() on closed GzipFile object"
195 if len(data) > 0:
196 self.size = self.size + len(data)
197 self.crc = zlib.crc32(data, self.crc)
198 self.fileobj.write( self.compress.compress(data) )
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000199 self.offset += len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000200
Guido van Rossum56068012000-02-02 16:51:06 +0000201 def read(self, size=-1):
Martin v. Löwisdb044892002-03-11 06:46:52 +0000202 if self.mode != READ:
203 import errno
204 raise IOError(errno.EBADF, "write() on read-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000205
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000206 if self.extrasize <= 0 and self.fileobj is None:
207 return ''
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000208
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000209 readsize = 1024
Guido van Rossum56068012000-02-02 16:51:06 +0000210 if size < 0: # get the whole thing
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000211 try:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000212 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000213 self._read(readsize)
214 readsize = readsize * 2
215 except EOFError:
216 size = self.extrasize
217 else: # just get some more of it
218 try:
219 while size > self.extrasize:
220 self._read(readsize)
221 readsize = readsize * 2
222 except EOFError:
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000223 if size > self.extrasize:
224 size = self.extrasize
Tim Peters07e99cb2001-01-14 23:47:14 +0000225
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000226 chunk = self.extrabuf[:size]
227 self.extrabuf = self.extrabuf[size:]
228 self.extrasize = self.extrasize - size
Guido van Rossum15262191997-04-30 16:04:57 +0000229
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000230 self.offset += size
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000231 return chunk
Guido van Rossum15262191997-04-30 16:04:57 +0000232
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000233 def _unread(self, buf):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000234 self.extrabuf = buf + self.extrabuf
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000235 self.extrasize = len(buf) + self.extrasize
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000236 self.offset -= len(buf)
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000237
238 def _read(self, size=1024):
Tim Petersfb0ea522002-11-04 19:50:11 +0000239 if self.fileobj is None:
240 raise EOFError, "Reached EOF"
Tim Peters07e99cb2001-01-14 23:47:14 +0000241
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000242 if self._new_member:
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000243 # If the _new_member flag is set, we have to
244 # jump to the next member, if there is one.
Tim Peters07e99cb2001-01-14 23:47:14 +0000245 #
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000246 # First, check if we're at the end of the file;
247 # if so, it's time to stop; no more members to read.
248 pos = self.fileobj.tell() # Save current position
249 self.fileobj.seek(0, 2) # Seek to end of file
250 if pos == self.fileobj.tell():
Andrew M. Kuchling2d813e51999-09-06 16:34:51 +0000251 raise EOFError, "Reached EOF"
Tim Peters07e99cb2001-01-14 23:47:14 +0000252 else:
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000253 self.fileobj.seek( pos ) # Return to original position
Tim Peters07e99cb2001-01-14 23:47:14 +0000254
255 self._init_read()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000256 self._read_gzip_header()
257 self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000258 self._new_member = False
Tim Peters07e99cb2001-01-14 23:47:14 +0000259
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000260 # Read a chunk of data from the file
261 buf = self.fileobj.read(size)
Tim Peters07e99cb2001-01-14 23:47:14 +0000262
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000263 # If the EOF has been reached, flush the decompression object
264 # and mark this object as finished.
Tim Peters07e99cb2001-01-14 23:47:14 +0000265
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000266 if buf == "":
267 uncompress = self.decompress.flush()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000268 self._read_eof()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000269 self._add_read_data( uncompress )
270 raise EOFError, 'Reached EOF'
Tim Peters07e99cb2001-01-14 23:47:14 +0000271
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000272 uncompress = self.decompress.decompress(buf)
273 self._add_read_data( uncompress )
274
275 if self.decompress.unused_data != "":
276 # Ending case: we've come to the end of a member in the file,
277 # so seek back to the start of the unused data, finish up
278 # this member, and read a new gzip header.
279 # (The number of bytes to seek back is the length of the unused
280 # data, minus 8 because _read_eof() will rewind a further 8 bytes)
281 self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
282
283 # Check the CRC and file size, and set the flag so we read
Tim Peters07e99cb2001-01-14 23:47:14 +0000284 # a new member on the next call
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000285 self._read_eof()
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000286 self._new_member = True
Tim Peters07e99cb2001-01-14 23:47:14 +0000287
288 def _add_read_data(self, data):
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000289 self.crc = zlib.crc32(data, self.crc)
290 self.extrabuf = self.extrabuf + data
291 self.extrasize = self.extrasize + len(data)
292 self.size = self.size + len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000293
294 def _read_eof(self):
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000295 # We've read to the end of the file, so we have to rewind in order
Tim Peters07e99cb2001-01-14 23:47:14 +0000296 # to reread the 8 bytes containing the CRC and the file size.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000297 # We check the that the computed CRC and size of the
298 # uncompressed data matches the stored values.
299 self.fileobj.seek(-8, 1)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000300 crc32 = read32(self.fileobj)
Tim Petersfb0ea522002-11-04 19:50:11 +0000301 isize = U32(read32(self.fileobj)) # may exceed 2GB
302 if U32(crc32) != U32(self.crc):
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000303 raise ValueError, "CRC check failed"
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000304 elif isize != self.size:
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000305 raise ValueError, "Incorrect length of data produced"
Tim Peters07e99cb2001-01-14 23:47:14 +0000306
Guido van Rossum15262191997-04-30 16:04:57 +0000307 def close(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000308 if self.mode == WRITE:
309 self.fileobj.write(self.compress.flush())
310 write32(self.fileobj, self.crc)
Tim Petersfb0ea522002-11-04 19:50:11 +0000311 # self.size may exceed 2GB
312 write32u(self.fileobj, self.size)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000313 self.fileobj = None
314 elif self.mode == READ:
315 self.fileobj = None
316 if self.myfileobj:
317 self.myfileobj.close()
318 self.myfileobj = None
Guido van Rossum15262191997-04-30 16:04:57 +0000319
Andrew M. Kuchling916fcc31999-08-10 13:19:30 +0000320 def __del__(self):
Jeremy Hyltone298c302000-05-08 16:59:59 +0000321 try:
322 if (self.myfileobj is None and
323 self.fileobj is None):
324 return
325 except AttributeError:
326 return
327 self.close()
Tim Peters07e99cb2001-01-14 23:47:14 +0000328
Guido van Rossum15262191997-04-30 16:04:57 +0000329 def flush(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000330 self.fileobj.flush()
Guido van Rossum15262191997-04-30 16:04:57 +0000331
Guido van Rossum15262191997-04-30 16:04:57 +0000332 def isatty(self):
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000333 return False
Guido van Rossum15262191997-04-30 16:04:57 +0000334
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000335 def tell(self):
336 return self.offset
337
338 def rewind(self):
339 '''Return the uncompressed stream file position indicator to the
Tim Petersab9ba272001-08-09 21:40:30 +0000340 beginning of the file'''
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000341 if self.mode != READ:
342 raise IOError("Can't rewind in write mode")
343 self.fileobj.seek(0)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000344 self._new_member = True
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000345 self.extrabuf = ""
346 self.extrasize = 0
347 self.offset = 0
348
349 def seek(self, offset):
350 if self.mode == WRITE:
351 if offset < self.offset:
352 raise IOError('Negative seek in write mode')
353 count = offset - self.offset
Tim Petersfb0ea522002-11-04 19:50:11 +0000354 for i in range(count // 1024):
355 self.write(1024 * '\0')
356 self.write((count % 1024) * '\0')
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000357 elif self.mode == READ:
358 if offset < self.offset:
359 # for negative seek, rewind and do positive seek
360 self.rewind()
361 count = offset - self.offset
Tim Petersfb0ea522002-11-04 19:50:11 +0000362 for i in range(count // 1024):
363 self.read(1024)
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000364 self.read(count % 1024)
365
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000366 def readline(self, size=-1):
367 if size < 0: size = sys.maxint
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000368 bufs = []
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000369 readsize = min(100, size) # Read from the file in small chunks
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000370 while True:
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000371 if size == 0:
Eric S. Raymondee5e61d2001-02-09 09:10:35 +0000372 return "".join(bufs) # Return resulting line
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000373
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000374 c = self.read(readsize)
Eric S. Raymondee5e61d2001-02-09 09:10:35 +0000375 i = c.find('\n')
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000376 if size is not None:
377 # We set i=size to break out of the loop under two
Tim Peters07e99cb2001-01-14 23:47:14 +0000378 # conditions: 1) there's no newline, and the chunk is
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000379 # larger than size, or 2) there is a newline, but the
380 # resulting line would be longer than 'size'.
381 if i==-1 and len(c) > size: i=size-1
382 elif size <= i: i = size -1
Guido van Rossum15262191997-04-30 16:04:57 +0000383
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000384 if i >= 0 or c == '':
385 bufs.append(c[:i+1]) # Add portion of last chunk
386 self._unread(c[i+1:]) # Push back rest of chunk
Eric S. Raymondee5e61d2001-02-09 09:10:35 +0000387 return ''.join(bufs) # Return resulting line
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000388
389 # Append chunk to list, decrease 'size',
390 bufs.append(c)
391 size = size - len(c)
392 readsize = min(size, readsize * 2)
Tim Peters07e99cb2001-01-14 23:47:14 +0000393
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000394 def readlines(self, sizehint=0):
395 # Negative numbers result in reading all the lines
Tim Petersfb0ea522002-11-04 19:50:11 +0000396 if sizehint <= 0:
397 sizehint = sys.maxint
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000398 L = []
399 while sizehint > 0:
400 line = self.readline()
Tim Petersfb0ea522002-11-04 19:50:11 +0000401 if line == "":
402 break
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000403 L.append(line)
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000404 sizehint = sizehint - len(line)
405
406 return L
Guido van Rossum15262191997-04-30 16:04:57 +0000407
Guido van Rossum68de3791997-07-19 20:22:23 +0000408 def writelines(self, L):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000409 for line in L:
410 self.write(line)
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000411
Neil Schemenauercacbdf62002-03-20 18:36:00 +0000412 def __iter__(self):
413 return self
414
415 def next(self):
416 line = self.readline()
417 if line:
418 return line
419 else:
420 raise StopIteration
421
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000422
423def _test():
424 # Act like gzip; with -d, act like gunzip.
425 # The input file is not deleted, however, nor are any other gzip
426 # options or features supported.
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000427 args = sys.argv[1:]
428 decompress = args and args[0] == "-d"
429 if decompress:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000430 args = args[1:]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000431 if not args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000432 args = ["-"]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000433 for arg in args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000434 if decompress:
435 if arg == "-":
436 f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)
437 g = sys.stdout
438 else:
439 if arg[-3:] != ".gz":
440 print "filename doesn't end in .gz:", `arg`
441 continue
442 f = open(arg, "rb")
443 g = __builtin__.open(arg[:-3], "wb")
444 else:
445 if arg == "-":
446 f = sys.stdin
447 g = GzipFile(filename="", mode="wb", fileobj=sys.stdout)
448 else:
449 f = __builtin__.open(arg, "rb")
450 g = open(arg + ".gz", "wb")
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000451 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000452 chunk = f.read(1024)
453 if not chunk:
454 break
455 g.write(chunk)
456 if g is not sys.stdout:
457 g.close()
458 if f is not sys.stdin:
459 f.close()
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000460
461if __name__ == '__main__':
462 _test()