blob: a5d4087f8eb219bd57b16aca6daafa0ab68b5d04 [file] [log] [blame]
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00001"""Functions that read and write gzipped files.
2
Guido van Rossum54f22ed2000-02-04 15:10:34 +00003The user of the file doesn't have to worry about the compression,
4but random access is not allowed."""
5
6# based on Andrew Kuchling's minigzip.py distributed with the zlib module
7
Eric S. Raymondee5e61d2001-02-09 09:10:35 +00008import struct, sys, time
Guido van Rossum15262191997-04-30 16:04:57 +00009import zlib
Guido van Rossum68de3791997-07-19 20:22:23 +000010import __builtin__
Guido van Rossum15262191997-04-30 16:04:57 +000011
Skip Montanaro2dd42762001-01-23 15:35:05 +000012__all__ = ["GzipFile","open"]
13
Guido van Rossum15262191997-04-30 16:04:57 +000014FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
15
16READ, WRITE = 1, 2
17
Tim Petersfb0ea522002-11-04 19:50:11 +000018def U32(i):
19 """Return i as an unsigned integer, assuming it fits in 32 bits.
20
21 If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
22 """
23 if i < 0:
24 i += 1L << 32
25 return i
26
Tim Peters9288f952002-11-05 20:38:55 +000027def LOWU32(i):
28 """Return the low-order 32 bits of an int, as a non-negative int."""
29 return i & 0xFFFFFFFFL
30
Guido van Rossum15262191997-04-30 16:04:57 +000031def write32(output, value):
Jeremy Hyltonc19f9971999-03-23 23:05:34 +000032 output.write(struct.pack("<l", value))
Tim Peters07e99cb2001-01-14 23:47:14 +000033
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000034def write32u(output, value):
Tim Petersfb0ea522002-11-04 19:50:11 +000035 # The L format writes the bit pattern correctly whether signed
36 # or unsigned.
Guido van Rossum95bdd0b1999-04-12 14:34:16 +000037 output.write(struct.pack("<L", value))
38
Guido van Rossum15262191997-04-30 16:04:57 +000039def read32(input):
Jeremy Hyltonc19f9971999-03-23 23:05:34 +000040 return struct.unpack("<l", input.read(4))[0]
Guido van Rossum15262191997-04-30 16:04:57 +000041
Fred Drakefa1591c1999-04-05 18:37:59 +000042def open(filename, mode="rb", compresslevel=9):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000043 """Shorthand for GzipFile(filename, mode, compresslevel).
44
45 The filename argument is required; mode defaults to 'rb'
46 and compresslevel defaults to 9.
47
48 """
Guido van Rossum15262191997-04-30 16:04:57 +000049 return GzipFile(filename, mode, compresslevel)
50
51class GzipFile:
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000052 """The GzipFile class simulates most of the methods of a file object with
Guido van Rossum97c5fcc2002-08-06 17:03:25 +000053 the exception of the readinto() and truncate() methods.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000054
55 """
Guido van Rossum15262191997-04-30 16:04:57 +000056
Guido van Rossum68de3791997-07-19 20:22:23 +000057 myfileobj = None
58
Tim Peters07e99cb2001-01-14 23:47:14 +000059 def __init__(self, filename=None, mode=None,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000060 compresslevel=9, fileobj=None):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000061 """Constructor for the GzipFile class.
62
63 At least one of fileobj and filename must be given a
64 non-trivial value.
65
66 The new class instance is based on fileobj, which can be a regular
67 file, a StringIO object, or any other object which simulates a file.
68 It defaults to None, in which case filename is opened to provide
69 a file object.
70
71 When fileobj is not None, the filename argument is only used to be
72 included in the gzip file header, which may includes the original
73 filename of the uncompressed file. It defaults to the filename of
74 fileobj, if discernible; otherwise, it defaults to the empty string,
75 and in this case the original filename is not included in the header.
76
77 The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
78 depending on whether the file will be read or written. The default
79 is the mode of fileobj if discernible; otherwise, the default is 'rb'.
80 Be aware that only the 'rb', 'ab', and 'wb' values should be used
81 for cross-platform portability.
82
83 The compresslevel argument is an integer from 1 to 9 controlling the
84 level of compression; 1 is fastest and produces the least compression,
85 and 9 is slowest and produces the most compression. The default is 9.
86
87 """
88
Skip Montanaro12424bc2002-05-23 01:43:05 +000089 # guarantee the file is opened in binary mode on platforms
90 # that care about that sort of thing
91 if mode and 'b' not in mode:
92 mode += 'b'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000093 if fileobj is None:
Fred Drake9bb76d11999-04-05 18:33:40 +000094 fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
Guido van Rossum68de3791997-07-19 20:22:23 +000095 if filename is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000096 if hasattr(fileobj, 'name'): filename = fileobj.name
97 else: filename = ''
Guido van Rossum68de3791997-07-19 20:22:23 +000098 if mode is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000099 if hasattr(fileobj, 'mode'): mode = fileobj.mode
Fred Drake9bb76d11999-04-05 18:33:40 +0000100 else: mode = 'rb'
Guido van Rossum68de3791997-07-19 20:22:23 +0000101
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000102 if mode[0:1] == 'r':
103 self.mode = READ
Tim Peters07e99cb2001-01-14 23:47:14 +0000104 # Set flag indicating start of a new member
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000105 self._new_member = True
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000106 self.extrabuf = ""
107 self.extrasize = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000108 self.filename = filename
Guido van Rossum15262191997-04-30 16:04:57 +0000109
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000110 elif mode[0:1] == 'w' or mode[0:1] == 'a':
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000111 self.mode = WRITE
112 self._init_write(filename)
113 self.compress = zlib.compressobj(compresslevel,
Tim Peters07e99cb2001-01-14 23:47:14 +0000114 zlib.DEFLATED,
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000115 -zlib.MAX_WBITS,
116 zlib.DEF_MEM_LEVEL,
117 0)
118 else:
Martin v. Löwisdb044892002-03-11 06:46:52 +0000119 raise IOError, "Mode " + mode + " not supported"
Guido van Rossum15262191997-04-30 16:04:57 +0000120
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000121 self.fileobj = fileobj
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000122 self.offset = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000123
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000124 if self.mode == WRITE:
125 self._write_gzip_header()
Guido van Rossum15262191997-04-30 16:04:57 +0000126
127 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000128 s = repr(self.fileobj)
129 return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
Guido van Rossum15262191997-04-30 16:04:57 +0000130
131 def _init_write(self, filename):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000132 if filename[-3:] != '.gz':
133 filename = filename + '.gz'
134 self.filename = filename
135 self.crc = zlib.crc32("")
136 self.size = 0
137 self.writebuf = []
138 self.bufsize = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000139
140 def _write_gzip_header(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000141 self.fileobj.write('\037\213') # magic header
142 self.fileobj.write('\010') # compression method
143 fname = self.filename[:-3]
144 flags = 0
145 if fname:
146 flags = FNAME
147 self.fileobj.write(chr(flags))
Guido van Rossum95bdd0b1999-04-12 14:34:16 +0000148 write32u(self.fileobj, long(time.time()))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000149 self.fileobj.write('\002')
150 self.fileobj.write('\377')
151 if fname:
152 self.fileobj.write(fname + '\000')
Guido van Rossum15262191997-04-30 16:04:57 +0000153
154 def _init_read(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000155 self.crc = zlib.crc32("")
156 self.size = 0
Guido van Rossum15262191997-04-30 16:04:57 +0000157
158 def _read_gzip_header(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000159 magic = self.fileobj.read(2)
160 if magic != '\037\213':
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000161 raise IOError, 'Not a gzipped file'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000162 method = ord( self.fileobj.read(1) )
163 if method != 8:
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000164 raise IOError, 'Unknown compression method'
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000165 flag = ord( self.fileobj.read(1) )
166 # modtime = self.fileobj.read(4)
167 # extraflag = self.fileobj.read(1)
168 # os = self.fileobj.read(1)
169 self.fileobj.read(6)
Guido van Rossum15262191997-04-30 16:04:57 +0000170
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000171 if flag & FEXTRA:
172 # Read & discard the extra field, if present
Tim Petersfb0ea522002-11-04 19:50:11 +0000173 xlen = ord(self.fileobj.read(1))
174 xlen = xlen + 256*ord(self.fileobj.read(1))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000175 self.fileobj.read(xlen)
176 if flag & FNAME:
177 # Read and discard a null-terminated string containing the filename
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000178 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000179 s = self.fileobj.read(1)
180 if not s or s=='\000':
181 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000182 if flag & FCOMMENT:
183 # Read and discard a null-terminated string containing a comment
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000184 while True:
Tim Petersfb0ea522002-11-04 19:50:11 +0000185 s = self.fileobj.read(1)
186 if not s or s=='\000':
187 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000188 if flag & FHCRC:
189 self.fileobj.read(2) # Read & discard the 16-bit header CRC
Guido van Rossum15262191997-04-30 16:04:57 +0000190
191
192 def write(self,data):
Martin v. Löwisdb044892002-03-11 06:46:52 +0000193 if self.mode != WRITE:
194 import errno
195 raise IOError(errno.EBADF, "write() on read-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000196
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000197 if self.fileobj is None:
198 raise ValueError, "write() on closed GzipFile object"
199 if len(data) > 0:
200 self.size = self.size + len(data)
201 self.crc = zlib.crc32(data, self.crc)
202 self.fileobj.write( self.compress.compress(data) )
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000203 self.offset += len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000204
Guido van Rossum56068012000-02-02 16:51:06 +0000205 def read(self, size=-1):
Martin v. Löwisdb044892002-03-11 06:46:52 +0000206 if self.mode != READ:
207 import errno
Brett Cannonedfb3022003-12-04 19:28:06 +0000208 raise IOError(errno.EBADF, "read() on write-only GzipFile object")
Tim Peters863ac442002-04-16 01:38:40 +0000209
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000210 if self.extrasize <= 0 and self.fileobj is None:
211 return ''
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000212
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000213 readsize = 1024
Guido van Rossum56068012000-02-02 16:51:06 +0000214 if size < 0: # get the whole thing
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000215 try:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000216 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000217 self._read(readsize)
218 readsize = readsize * 2
219 except EOFError:
220 size = self.extrasize
221 else: # just get some more of it
222 try:
223 while size > self.extrasize:
224 self._read(readsize)
225 readsize = readsize * 2
226 except EOFError:
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000227 if size > self.extrasize:
228 size = self.extrasize
Tim Peters07e99cb2001-01-14 23:47:14 +0000229
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000230 chunk = self.extrabuf[:size]
231 self.extrabuf = self.extrabuf[size:]
232 self.extrasize = self.extrasize - size
Guido van Rossum15262191997-04-30 16:04:57 +0000233
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000234 self.offset += size
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000235 return chunk
Guido van Rossum15262191997-04-30 16:04:57 +0000236
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000237 def _unread(self, buf):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000238 self.extrabuf = buf + self.extrabuf
Guido van Rossum84c6fc91998-08-03 15:41:39 +0000239 self.extrasize = len(buf) + self.extrasize
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000240 self.offset -= len(buf)
Guido van Rossumb16a3b81998-01-27 19:29:45 +0000241
242 def _read(self, size=1024):
Tim Petersfb0ea522002-11-04 19:50:11 +0000243 if self.fileobj is None:
244 raise EOFError, "Reached EOF"
Tim Peters07e99cb2001-01-14 23:47:14 +0000245
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000246 if self._new_member:
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000247 # If the _new_member flag is set, we have to
248 # jump to the next member, if there is one.
Tim Peters07e99cb2001-01-14 23:47:14 +0000249 #
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000250 # First, check if we're at the end of the file;
251 # if so, it's time to stop; no more members to read.
252 pos = self.fileobj.tell() # Save current position
253 self.fileobj.seek(0, 2) # Seek to end of file
254 if pos == self.fileobj.tell():
Andrew M. Kuchling2d813e51999-09-06 16:34:51 +0000255 raise EOFError, "Reached EOF"
Tim Peters07e99cb2001-01-14 23:47:14 +0000256 else:
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000257 self.fileobj.seek( pos ) # Return to original position
Tim Peters07e99cb2001-01-14 23:47:14 +0000258
259 self._init_read()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000260 self._read_gzip_header()
261 self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000262 self._new_member = False
Tim Peters07e99cb2001-01-14 23:47:14 +0000263
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000264 # Read a chunk of data from the file
265 buf = self.fileobj.read(size)
Tim Peters07e99cb2001-01-14 23:47:14 +0000266
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000267 # If the EOF has been reached, flush the decompression object
268 # and mark this object as finished.
Tim Peters07e99cb2001-01-14 23:47:14 +0000269
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000270 if buf == "":
271 uncompress = self.decompress.flush()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000272 self._read_eof()
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000273 self._add_read_data( uncompress )
274 raise EOFError, 'Reached EOF'
Tim Peters07e99cb2001-01-14 23:47:14 +0000275
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000276 uncompress = self.decompress.decompress(buf)
277 self._add_read_data( uncompress )
278
279 if self.decompress.unused_data != "":
280 # Ending case: we've come to the end of a member in the file,
281 # so seek back to the start of the unused data, finish up
282 # this member, and read a new gzip header.
283 # (The number of bytes to seek back is the length of the unused
284 # data, minus 8 because _read_eof() will rewind a further 8 bytes)
285 self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
286
287 # Check the CRC and file size, and set the flag so we read
Tim Peters07e99cb2001-01-14 23:47:14 +0000288 # a new member on the next call
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000289 self._read_eof()
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000290 self._new_member = True
Tim Peters07e99cb2001-01-14 23:47:14 +0000291
292 def _add_read_data(self, data):
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000293 self.crc = zlib.crc32(data, self.crc)
294 self.extrabuf = self.extrabuf + data
295 self.extrasize = self.extrasize + len(data)
296 self.size = self.size + len(data)
Guido van Rossum15262191997-04-30 16:04:57 +0000297
298 def _read_eof(self):
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000299 # We've read to the end of the file, so we have to rewind in order
Tim Peters07e99cb2001-01-14 23:47:14 +0000300 # to reread the 8 bytes containing the CRC and the file size.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000301 # We check the that the computed CRC and size of the
Tim Peters9288f952002-11-05 20:38:55 +0000302 # uncompressed data matches the stored values. Note that the size
303 # stored is the true file size mod 2**32.
Andrew M. Kuchlingf4f119c1999-03-25 21:49:14 +0000304 self.fileobj.seek(-8, 1)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000305 crc32 = read32(self.fileobj)
Tim Petersfb0ea522002-11-04 19:50:11 +0000306 isize = U32(read32(self.fileobj)) # may exceed 2GB
307 if U32(crc32) != U32(self.crc):
Andrew M. Kuchling64edd6a2003-02-05 21:35:07 +0000308 raise IOError, "CRC check failed"
Tim Peters9288f952002-11-05 20:38:55 +0000309 elif isize != LOWU32(self.size):
Andrew M. Kuchling64edd6a2003-02-05 21:35:07 +0000310 raise IOError, "Incorrect length of data produced"
Tim Peters07e99cb2001-01-14 23:47:14 +0000311
Guido van Rossum15262191997-04-30 16:04:57 +0000312 def close(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000313 if self.mode == WRITE:
314 self.fileobj.write(self.compress.flush())
315 write32(self.fileobj, self.crc)
Tim Peters9288f952002-11-05 20:38:55 +0000316 # self.size may exceed 2GB, or even 4GB
317 write32u(self.fileobj, LOWU32(self.size))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000318 self.fileobj = None
319 elif self.mode == READ:
320 self.fileobj = None
321 if self.myfileobj:
322 self.myfileobj.close()
323 self.myfileobj = None
Guido van Rossum15262191997-04-30 16:04:57 +0000324
Andrew M. Kuchling916fcc31999-08-10 13:19:30 +0000325 def __del__(self):
Jeremy Hyltone298c302000-05-08 16:59:59 +0000326 try:
327 if (self.myfileobj is None and
328 self.fileobj is None):
329 return
330 except AttributeError:
331 return
332 self.close()
Tim Peters07e99cb2001-01-14 23:47:14 +0000333
Guido van Rossum15262191997-04-30 16:04:57 +0000334 def flush(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000335 self.fileobj.flush()
Guido van Rossum15262191997-04-30 16:04:57 +0000336
Guido van Rossum15262191997-04-30 16:04:57 +0000337 def isatty(self):
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000338 return False
Guido van Rossum15262191997-04-30 16:04:57 +0000339
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000340 def tell(self):
341 return self.offset
342
343 def rewind(self):
344 '''Return the uncompressed stream file position indicator to the
Tim Petersab9ba272001-08-09 21:40:30 +0000345 beginning of the file'''
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000346 if self.mode != READ:
347 raise IOError("Can't rewind in write mode")
348 self.fileobj.seek(0)
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000349 self._new_member = True
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000350 self.extrabuf = ""
351 self.extrasize = 0
352 self.offset = 0
353
354 def seek(self, offset):
355 if self.mode == WRITE:
356 if offset < self.offset:
357 raise IOError('Negative seek in write mode')
358 count = offset - self.offset
Tim Petersfb0ea522002-11-04 19:50:11 +0000359 for i in range(count // 1024):
360 self.write(1024 * '\0')
361 self.write((count % 1024) * '\0')
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000362 elif self.mode == READ:
363 if offset < self.offset:
364 # for negative seek, rewind and do positive seek
365 self.rewind()
366 count = offset - self.offset
Tim Petersfb0ea522002-11-04 19:50:11 +0000367 for i in range(count // 1024):
368 self.read(1024)
Martin v. Löwis8cc965c2001-08-09 07:21:56 +0000369 self.read(count % 1024)
370
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000371 def readline(self, size=-1):
372 if size < 0: size = sys.maxint
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000373 bufs = []
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000374 readsize = min(100, size) # Read from the file in small chunks
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000375 while True:
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000376 if size == 0:
Eric S. Raymondee5e61d2001-02-09 09:10:35 +0000377 return "".join(bufs) # Return resulting line
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000378
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000379 c = self.read(readsize)
Eric S. Raymondee5e61d2001-02-09 09:10:35 +0000380 i = c.find('\n')
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000381 if size is not None:
382 # We set i=size to break out of the loop under two
Tim Peters07e99cb2001-01-14 23:47:14 +0000383 # conditions: 1) there's no newline, and the chunk is
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000384 # larger than size, or 2) there is a newline, but the
385 # resulting line would be longer than 'size'.
386 if i==-1 and len(c) > size: i=size-1
387 elif size <= i: i = size -1
Guido van Rossum15262191997-04-30 16:04:57 +0000388
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000389 if i >= 0 or c == '':
390 bufs.append(c[:i+1]) # Add portion of last chunk
391 self._unread(c[i+1:]) # Push back rest of chunk
Eric S. Raymondee5e61d2001-02-09 09:10:35 +0000392 return ''.join(bufs) # Return resulting line
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000393
394 # Append chunk to list, decrease 'size',
395 bufs.append(c)
396 size = size - len(c)
397 readsize = min(size, readsize * 2)
Tim Peters07e99cb2001-01-14 23:47:14 +0000398
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000399 def readlines(self, sizehint=0):
400 # Negative numbers result in reading all the lines
Tim Petersfb0ea522002-11-04 19:50:11 +0000401 if sizehint <= 0:
402 sizehint = sys.maxint
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000403 L = []
404 while sizehint > 0:
405 line = self.readline()
Tim Petersfb0ea522002-11-04 19:50:11 +0000406 if line == "":
407 break
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000408 L.append(line)
Andrew M. Kuchling41616ee2000-07-29 20:15:26 +0000409 sizehint = sizehint - len(line)
410
411 return L
Guido van Rossum15262191997-04-30 16:04:57 +0000412
Guido van Rossum68de3791997-07-19 20:22:23 +0000413 def writelines(self, L):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000414 for line in L:
415 self.write(line)
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000416
Neil Schemenauercacbdf62002-03-20 18:36:00 +0000417 def __iter__(self):
418 return self
419
420 def next(self):
421 line = self.readline()
422 if line:
423 return line
424 else:
425 raise StopIteration
426
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000427
428def _test():
429 # Act like gzip; with -d, act like gunzip.
430 # The input file is not deleted, however, nor are any other gzip
431 # options or features supported.
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000432 args = sys.argv[1:]
433 decompress = args and args[0] == "-d"
434 if decompress:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000435 args = args[1:]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000436 if not args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000437 args = ["-"]
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000438 for arg in args:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000439 if decompress:
440 if arg == "-":
441 f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)
442 g = sys.stdout
443 else:
444 if arg[-3:] != ".gz":
445 print "filename doesn't end in .gz:", `arg`
446 continue
447 f = open(arg, "rb")
448 g = __builtin__.open(arg[:-3], "wb")
449 else:
450 if arg == "-":
451 f = sys.stdin
452 g = GzipFile(filename="", mode="wb", fileobj=sys.stdout)
453 else:
454 f = __builtin__.open(arg, "rb")
455 g = open(arg + ".gz", "wb")
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000456 while True:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000457 chunk = f.read(1024)
458 if not chunk:
459 break
460 g.write(chunk)
461 if g is not sys.stdout:
462 g.close()
463 if f is not sys.stdin:
464 f.close()
Guido van Rossum51ca6e31997-12-30 20:09:08 +0000465
466if __name__ == '__main__':
467 _test()