blob: 6cff722fd161d0666a086b022626d38ab1e59cea [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Martin v. Löwis00756902006-02-05 17:09:41 +00006import struct, os, time, sys
Guido van Rossum68937b42007-05-18 00:51:22 +00007import binascii, io
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +000011except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000012 zlib = None
13
Skip Montanaro40fc1602001-03-01 04:27:19 +000014__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000015 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000016
Fred Drake5db246d2000-09-29 20:44:48 +000017class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000019
20
21class LargeZipFile(Exception):
22 """
23 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
24 and those extensions are disabled.
25 """
26
Tim Peterse1190062001-01-15 03:34:38 +000027error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Thomas Wouters0e3f5912006-08-11 14:57:12 +000029ZIP64_LIMIT= (1 << 31) - 1
30
Guido van Rossum32abe6f2000-03-31 17:30:02 +000031# constants for Zip file compression methods
32ZIP_STORED = 0
33ZIP_DEFLATED = 8
34# Other ZIP compression methods not supported
35
36# Here are some struct module formats for reading headers
37structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000038stringEndArchive = b"PK\005\006" # magic number for end of archive record
Brett Cannonff450f72004-07-10 19:09:20 +000039structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000040stringCentralDir = b"PK\001\002" # magic number for central directory
Brett Cannonff450f72004-07-10 19:09:20 +000041structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000042stringFileHeader = b"PK\003\004" # magic number for file header
Thomas Wouters0e3f5912006-08-11 14:57:12 +000043structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000044stringEndArchive64Locator = b"PK\x06\x07" # magic token for locator header
Thomas Wouters0e3f5912006-08-11 14:57:12 +000045structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000046stringEndArchive64 = b"PK\x06\x06" # magic token for Zip64 header
Thomas Wouters0e3f5912006-08-11 14:57:12 +000047
Guido van Rossum32abe6f2000-03-31 17:30:02 +000048
Fred Drake3e038e52001-02-28 17:56:26 +000049# indexes of entries in the central directory structure
50_CD_SIGNATURE = 0
51_CD_CREATE_VERSION = 1
52_CD_CREATE_SYSTEM = 2
53_CD_EXTRACT_VERSION = 3
54_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
55_CD_FLAG_BITS = 5
56_CD_COMPRESS_TYPE = 6
57_CD_TIME = 7
58_CD_DATE = 8
59_CD_CRC = 9
60_CD_COMPRESSED_SIZE = 10
61_CD_UNCOMPRESSED_SIZE = 11
62_CD_FILENAME_LENGTH = 12
63_CD_EXTRA_FIELD_LENGTH = 13
64_CD_COMMENT_LENGTH = 14
65_CD_DISK_NUMBER_START = 15
66_CD_INTERNAL_FILE_ATTRIBUTES = 16
67_CD_EXTERNAL_FILE_ATTRIBUTES = 17
68_CD_LOCAL_HEADER_OFFSET = 18
69
70# indexes of entries in the local file header structure
71_FH_SIGNATURE = 0
72_FH_EXTRACT_VERSION = 1
73_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
74_FH_GENERAL_PURPOSE_FLAG_BITS = 3
75_FH_COMPRESSION_METHOD = 4
76_FH_LAST_MOD_TIME = 5
77_FH_LAST_MOD_DATE = 6
78_FH_CRC = 7
79_FH_COMPRESSED_SIZE = 8
80_FH_UNCOMPRESSED_SIZE = 9
81_FH_FILENAME_LENGTH = 10
82_FH_EXTRA_FIELD_LENGTH = 11
83
Guido van Rossum32abe6f2000-03-31 17:30:02 +000084def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000085 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +000086 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +000087 fpin = io.open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000088 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +000089 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000090 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +000091 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000092 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000093 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000094 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000095
Thomas Wouters0e3f5912006-08-11 14:57:12 +000096def _EndRecData64(fpin, offset, endrec):
97 """
98 Read the ZIP64 end-of-archive records and use that to update endrec
99 """
100 locatorSize = struct.calcsize(structEndArchive64Locator)
101 fpin.seek(offset - locatorSize, 2)
102 data = fpin.read(locatorSize)
103 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
104 if sig != stringEndArchive64Locator:
105 return endrec
106
107 if diskno != 0 or disks != 1:
108 raise BadZipfile("zipfiles that span multiple disks are not supported")
109
110 # Assume no 'zip64 extensible data'
111 endArchiveSize = struct.calcsize(structEndArchive64)
112 fpin.seek(offset - locatorSize - endArchiveSize, 2)
113 data = fpin.read(endArchiveSize)
114 sig, sz, create_version, read_version, disk_num, disk_dir, \
115 dircount, dircount2, dirsize, diroffset = \
116 struct.unpack(structEndArchive64, data)
117 if sig != stringEndArchive64:
118 return endrec
119
120 # Update the original endrec using data from the ZIP64 record
121 endrec[1] = disk_num
122 endrec[2] = disk_dir
123 endrec[3] = dircount
124 endrec[4] = dircount2
125 endrec[5] = dirsize
126 endrec[6] = diroffset
127 return endrec
128
129
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000130def _EndRecData(fpin):
131 """Return data from the "End of Central Directory" record, or None.
132
133 The data is a list of the nine items in the ZIP "End of central dir"
134 record followed by a tenth item, the file seek offset of this record."""
135 fpin.seek(-22, 2) # Assume no archive comment.
136 filesize = fpin.tell() + 22 # Get file size
137 data = fpin.read()
138 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
139 endrec = struct.unpack(structEndArchive, data)
140 endrec = list(endrec)
141 endrec.append("") # Append the archive comment
142 endrec.append(filesize - 22) # Append the record start offset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000143 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
144 return _EndRecData64(fpin, -22, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000145 return endrec
146 # Search the last END_BLOCK bytes of the file for the record signature.
147 # The comment is appended to the ZIP file and has a 16 bit length.
148 # So the comment may be up to 64K long. We limit the search for the
149 # signature to a few Kbytes at the end of the file for efficiency.
150 # also, the signature must not appear in the comment.
151 END_BLOCK = min(filesize, 1024 * 4)
152 fpin.seek(filesize - END_BLOCK, 0)
153 data = fpin.read()
154 start = data.rfind(stringEndArchive)
155 if start >= 0: # Correct signature string was found
156 endrec = struct.unpack(structEndArchive, data[start:start+22])
157 endrec = list(endrec)
158 comment = data[start+22:]
159 if endrec[7] == len(comment): # Comment length checks out
160 # Append the archive comment and start offset
161 endrec.append(comment)
162 endrec.append(filesize - END_BLOCK + start)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000163 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
164 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000165 return endrec
166 return # Error, return None
167
Fred Drake484d7352000-10-02 21:14:52 +0000168
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000169class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000170 """Class with attributes describing each file in the ZIP archive."""
171
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000172 __slots__ = (
173 'orig_filename',
174 'filename',
175 'date_time',
176 'compress_type',
177 'comment',
178 'extra',
179 'create_system',
180 'create_version',
181 'extract_version',
182 'reserved',
183 'flag_bits',
184 'volume',
185 'internal_attr',
186 'external_attr',
187 'header_offset',
188 'CRC',
189 'compress_size',
190 'file_size',
191 )
192
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000193 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000194 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000195
196 # Terminate the file name at the first null byte. Null bytes in file
197 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000198 null_byte = filename.find(chr(0))
199 if null_byte >= 0:
200 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000201 # This is used to ensure paths in generated ZIP files always use
202 # forward slashes as the directory separator, as required by the
203 # ZIP format specification.
204 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000205 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000206
Greg Ward8e36d282003-06-18 00:53:06 +0000207 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000208 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000209 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000210 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000211 self.comment = b"" # Comment for each file
212 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000213 if sys.platform == 'win32':
214 self.create_system = 0 # System which created ZIP archive
215 else:
216 # Assume everything else is unix-y
217 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000218 self.create_version = 20 # Version which created ZIP archive
219 self.extract_version = 20 # Version needed to extract archive
220 self.reserved = 0 # Must be zero
221 self.flag_bits = 0 # ZIP flag bits
222 self.volume = 0 # Volume number of file header
223 self.internal_attr = 0 # Internal attributes
224 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000225 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000226 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000227 # CRC CRC-32 of the uncompressed file
228 # compress_size Size of the compressed file
229 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000230
231 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000232 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000233 dt = self.date_time
234 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000235 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000236 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000237 # Set these to zero because we write them after the file data
238 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000239 else:
Tim Peterse1190062001-01-15 03:34:38 +0000240 CRC = self.CRC
241 compress_size = self.compress_size
242 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000243
244 extra = self.extra
245
246 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
247 # File is larger than what fits into a 4 byte integer,
248 # fall back to the ZIP64 extension
249 fmt = '<hhqq'
250 extra = extra + struct.pack(fmt,
251 1, struct.calcsize(fmt)-4, file_size, compress_size)
252 file_size = 0xffffffff # -1
253 compress_size = 0xffffffff # -1
254 self.extract_version = max(45, self.extract_version)
255 self.create_version = max(45, self.extract_version)
256
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000257 header = struct.pack(structFileHeader, stringFileHeader,
258 self.extract_version, self.reserved, self.flag_bits,
259 self.compress_type, dostime, dosdate, CRC,
260 compress_size, file_size,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000261 len(self.filename), len(extra))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000262 return header + self.filename.encode("utf-8") + extra
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000263
264 def _decodeExtra(self):
265 # Try to decode the extra field.
266 extra = self.extra
267 unpack = struct.unpack
268 while extra:
269 tp, ln = unpack('<hh', extra[:4])
270 if tp == 1:
271 if ln >= 24:
272 counts = unpack('<qqq', extra[4:28])
273 elif ln == 16:
274 counts = unpack('<qq', extra[4:20])
275 elif ln == 8:
276 counts = unpack('<q', extra[4:12])
277 elif ln == 0:
278 counts = ()
279 else:
280 raise RuntimeError, "Corrupt extra field %s"%(ln,)
281
282 idx = 0
283
284 # ZIP64 extension (large files and/or large archives)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000285 if self.file_size == -1 or self.file_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000286 self.file_size = counts[idx]
287 idx += 1
288
Guido van Rossume2a383d2007-01-15 16:59:06 +0000289 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000290 self.compress_size = counts[idx]
291 idx += 1
292
Guido van Rossume2a383d2007-01-15 16:59:06 +0000293 if self.header_offset == -1 or self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000294 old = self.header_offset
295 self.header_offset = counts[idx]
296 idx+=1
297
298 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000299
300
Thomas Wouterscf297e42007-02-23 15:07:44 +0000301class _ZipDecrypter:
302 """Class to handle decryption of files stored within a ZIP archive.
303
304 ZIP supports a password-based form of encryption. Even though known
305 plaintext attacks have been found against it, it is still useful
306 for low-level securicy.
307
308 Usage:
309 zd = _ZipDecrypter(mypwd)
310 plain_char = zd(cypher_char)
311 plain_text = map(zd, cypher_text)
312 """
313
314 def _GenerateCRCTable():
315 """Generate a CRC-32 table.
316
317 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
318 internal keys. We noticed that a direct implementation is faster than
319 relying on binascii.crc32().
320 """
321 poly = 0xedb88320
322 table = [0] * 256
323 for i in range(256):
324 crc = i
325 for j in range(8):
326 if crc & 1:
327 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
328 else:
329 crc = ((crc >> 1) & 0x7FFFFFFF)
330 table[i] = crc
331 return table
332 crctable = _GenerateCRCTable()
333
334 def _crc32(self, ch, crc):
335 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000336 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000337
338 def __init__(self, pwd):
339 self.key0 = 305419896
340 self.key1 = 591751049
341 self.key2 = 878082192
342 for p in pwd:
343 self._UpdateKeys(p)
344
345 def _UpdateKeys(self, c):
346 self.key0 = self._crc32(c, self.key0)
347 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
348 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000349 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000350
351 def __call__(self, c):
352 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000353 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000354 k = self.key2 | 2
355 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000356 self._UpdateKeys(c)
357 return c
358
Guido van Rossumd8faa362007-04-27 19:54:29 +0000359class ZipExtFile:
360 """File-like object for reading an archive member.
361 Is returned by ZipFile.open().
362 """
363
364 def __init__(self, fileobj, zipinfo, decrypt=None):
365 self.fileobj = fileobj
366 self.decrypter = decrypt
367 self.bytes_read = 0
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000368 self.rawbuffer = b''
369 self.readbuffer = b''
370 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000371 self.eof = False
372 self.univ_newlines = False
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000373 self.nlSeps = (b"\n", )
374 self.lastdiscard = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000375
376 self.compress_type = zipinfo.compress_type
377 self.compress_size = zipinfo.compress_size
378
379 self.closed = False
380 self.mode = "r"
381 self.name = zipinfo.filename
382
383 # read from compressed files in 64k blocks
384 self.compreadsize = 64*1024
385 if self.compress_type == ZIP_DEFLATED:
386 self.dc = zlib.decompressobj(-15)
387
388 def set_univ_newlines(self, univ_newlines):
389 self.univ_newlines = univ_newlines
390
391 # pick line separator char(s) based on universal newlines flag
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000392 self.nlSeps = (b"\n", )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000393 if self.univ_newlines:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000394 self.nlSeps = (b"\r\n", b"\r", b"\n")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000395
396 def __iter__(self):
397 return self
398
399 def __next__(self):
400 nextline = self.readline()
401 if not nextline:
402 raise StopIteration()
403
404 return nextline
405
406 def close(self):
407 self.closed = True
408
409 def _checkfornewline(self):
410 nl, nllen = -1, -1
411 if self.linebuffer:
412 # ugly check for cases where half of an \r\n pair was
413 # read on the last pass, and the \r was discarded. In this
414 # case we just throw away the \n at the start of the buffer.
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000415 if (self.lastdiscard, self.linebuffer[0]) == (b'\r', b'\n'):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000416 self.linebuffer = self.linebuffer[1:]
417
418 for sep in self.nlSeps:
419 nl = self.linebuffer.find(sep)
420 if nl >= 0:
421 nllen = len(sep)
422 return nl, nllen
423
424 return nl, nllen
425
426 def readline(self, size = -1):
427 """Read a line with approx. size. If size is negative,
428 read a whole line.
429 """
430 if size < 0:
431 size = sys.maxint
432 elif size == 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000433 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000434
435 # check for a newline already in buffer
436 nl, nllen = self._checkfornewline()
437
438 if nl >= 0:
439 # the next line was already in the buffer
440 nl = min(nl, size)
441 else:
442 # no line break in buffer - try to read more
443 size -= len(self.linebuffer)
444 while nl < 0 and size > 0:
445 buf = self.read(min(size, 100))
446 if not buf:
447 break
448 self.linebuffer += buf
449 size -= len(buf)
450
451 # check for a newline in buffer
452 nl, nllen = self._checkfornewline()
453
454 # we either ran out of bytes in the file, or
455 # met the specified size limit without finding a newline,
456 # so return current buffer
457 if nl < 0:
458 s = self.linebuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000459 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000460 return s
461
462 buf = self.linebuffer[:nl]
463 self.lastdiscard = self.linebuffer[nl:nl + nllen]
464 self.linebuffer = self.linebuffer[nl + nllen:]
465
466 # line is always returned with \n as newline char (except possibly
467 # for a final incomplete line in the file, which is handled above).
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000468 return buf + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000469
470 def readlines(self, sizehint = -1):
471 """Return a list with all (following) lines. The sizehint parameter
472 is ignored in this implementation.
473 """
474 result = []
475 while True:
476 line = self.readline()
477 if not line: break
478 result.append(line)
479 return result
480
481 def read(self, size = None):
482 # act like file() obj and return empty string if size is 0
483 if size == 0:
484 return ''
485
486 # determine read size
487 bytesToRead = self.compress_size - self.bytes_read
488
489 # adjust read size for encrypted files since the first 12 bytes
490 # are for the encryption/password information
491 if self.decrypter is not None:
492 bytesToRead -= 12
493
494 if size is not None and size >= 0:
495 if self.compress_type == ZIP_STORED:
496 lr = len(self.readbuffer)
497 bytesToRead = min(bytesToRead, size - lr)
498 elif self.compress_type == ZIP_DEFLATED:
499 if len(self.readbuffer) > size:
500 # the user has requested fewer bytes than we've already
501 # pulled through the decompressor; don't read any more
502 bytesToRead = 0
503 else:
504 # user will use up the buffer, so read some more
505 lr = len(self.rawbuffer)
506 bytesToRead = min(bytesToRead, self.compreadsize - lr)
507
508 # avoid reading past end of file contents
509 if bytesToRead + self.bytes_read > self.compress_size:
510 bytesToRead = self.compress_size - self.bytes_read
511
512 # try to read from file (if necessary)
513 if bytesToRead > 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000514 data = self.fileobj.read(bytesToRead)
515 self.bytes_read += len(data)
516 try:
517 self.rawbuffer += data
518 except:
519 print(repr(self.fileobj), repr(self.rawbuffer),
520 repr(data))
521 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +0000522
523 # handle contents of raw buffer
524 if self.rawbuffer:
525 newdata = self.rawbuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000526 self.rawbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000527
528 # decrypt new data if we were given an object to handle that
529 if newdata and self.decrypter is not None:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000530 newdata = bytes(map(self.decrypter, newdata))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000531
532 # decompress newly read data if necessary
533 if newdata and self.compress_type == ZIP_DEFLATED:
534 newdata = self.dc.decompress(newdata)
535 self.rawbuffer = self.dc.unconsumed_tail
536 if self.eof and len(self.rawbuffer) == 0:
537 # we're out of raw bytes (both from the file and
538 # the local buffer); flush just to make sure the
539 # decompressor is done
540 newdata += self.dc.flush()
541 # prevent decompressor from being used again
542 self.dc = None
543
544 self.readbuffer += newdata
545
546
547 # return what the user asked for
548 if size is None or len(self.readbuffer) <= size:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000549 data = self.readbuffer
550 self.readbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000551 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000552 data = self.readbuffer[:size]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000553 self.readbuffer = self.readbuffer[size:]
554
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000555 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000556
557
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000558class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000559 """ Class with methods to open, read, write, close, list zip files.
560
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000561 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000562
Fred Drake3d9091e2001-03-26 15:49:24 +0000563 file: Either the path to the file, or a file-like object.
564 If it is a path, the file will be opened and closed by ZipFile.
565 mode: The mode can be either read "r", write "w" or append "a".
566 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000567 allowZip64: if True ZipFile will create files with ZIP64 extensions when
568 needed, otherwise it will raise an exception when this would
569 be necessary.
570
Fred Drake3d9091e2001-03-26 15:49:24 +0000571 """
Fred Drake484d7352000-10-02 21:14:52 +0000572
Fred Drake90eac282001-02-28 05:29:34 +0000573 fp = None # Set here since __del__ checks it
574
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000575 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000576 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000577 self._allowZip64 = allowZip64
578 self._didModify = False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000579 if compression == ZIP_STORED:
580 pass
581 elif compression == ZIP_DEFLATED:
582 if not zlib:
583 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000584 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000585 else:
586 raise RuntimeError, "That compression method is not supported"
Tim Peterse1190062001-01-15 03:34:38 +0000587 self.debug = 0 # Level of printing: 0 through 3
588 self.NameToInfo = {} # Find file info given name
589 self.filelist = [] # List of ZipInfo instances for archive
590 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000591 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000592 self.pwd = None
Tim Petersa19a1682001-03-29 04:36:09 +0000593
Fred Drake3d9091e2001-03-26 15:49:24 +0000594 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000595 if isinstance(file, basestring):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000596 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000597 self._filePassed = 0
598 self.filename = file
599 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000600 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000601 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000602 except IOError:
603 if mode == 'a':
604 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000605 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000606 else:
607 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000608 else:
609 self._filePassed = 1
610 self.fp = file
611 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000612
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000613 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000614 self._GetContents()
615 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000616 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000617 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000618 try: # See if file is a zip file
619 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000620 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000621 self.fp.seek(self.start_dir, 0)
622 except BadZipfile: # file is not a zip file, just append
623 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000624 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000625 if not self._filePassed:
626 self.fp.close()
627 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000628 raise RuntimeError, 'Mode must be "r", "w" or "a"'
629
630 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000631 """Read the directory, making sure we close the file if the format
632 is bad."""
633 try:
634 self._RealGetContents()
635 except BadZipfile:
636 if not self._filePassed:
637 self.fp.close()
638 self.fp = None
639 raise
640
641 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000642 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000643 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000644 endrec = _EndRecData(fp)
645 if not endrec:
646 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000647 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000648 print(endrec)
Tim Peterse1190062001-01-15 03:34:38 +0000649 size_cd = endrec[5] # bytes in central directory
650 offset_cd = endrec[6] # offset of central directory
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000651 self.comment = endrec[8] # archive comment
652 # endrec[9] is the offset of the "End of Central Dir" record
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000653 if endrec[9] > ZIP64_LIMIT:
654 x = endrec[9] - size_cd - 56 - 20
655 else:
656 x = endrec[9] - size_cd
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000657 # "concat" is zero, unless zip was concatenated to another file
658 concat = x - offset_cd
659 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000660 print("given, inferred, offset", offset_cd, x, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000661 # self.start_dir: Position of start of central directory
662 self.start_dir = offset_cd + concat
663 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000664 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000665 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000666 total = 0
667 while total < size_cd:
668 centdir = fp.read(46)
669 total = total + 46
670 if centdir[0:4] != stringCentralDir:
671 raise BadZipfile, "Bad magic number for central directory"
672 centdir = struct.unpack(structCentralDir, centdir)
673 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000674 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000675 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000676 # Create ZipInfo instance to store file information
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000677 x = ZipInfo(str(filename))
Fred Drake3e038e52001-02-28 17:56:26 +0000678 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
679 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
680 total = (total + centdir[_CD_FILENAME_LENGTH]
681 + centdir[_CD_EXTRA_FIELD_LENGTH]
682 + centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000683 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000684 (x.create_version, x.create_system, x.extract_version, x.reserved,
685 x.flag_bits, x.compress_type, t, d,
686 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
687 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
688 # Convert date/time code to (year, month, day, hour, min, sec)
689 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000690 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000691
692 x._decodeExtra()
693 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000694 self.filelist.append(x)
695 self.NameToInfo[x.filename] = x
696 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000697 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000698
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000699
700 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000701 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000702 l = []
703 for data in self.filelist:
704 l.append(data.filename)
705 return l
706
707 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000708 """Return a list of class ZipInfo instances for files in the
709 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000710 return self.filelist
711
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000712 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000713 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000714 if file is None:
715 file = sys.stdout
716 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
717 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000718 for zinfo in self.filelist:
719 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000720 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
721 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000722
723 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000724 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000725 for zinfo in self.filelist:
726 try:
Tim Peterse1190062001-01-15 03:34:38 +0000727 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000728 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000729 return zinfo.filename
730
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000731
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000732 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000733 """Return the instance of ZipInfo given 'name'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000734 return self.NameToInfo[name]
735
Thomas Wouterscf297e42007-02-23 15:07:44 +0000736 def setpassword(self, pwd):
737 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000738 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000739 self.pwd = pwd
740
741 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000742 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000743 return self.open(name, "r", pwd).read()
744
745 def open(self, name, mode="r", pwd=None):
746 """Return file-like object for 'name'."""
747 if mode not in ("r", "U", "rU"):
748 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000749 if not self.fp:
750 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000751 "Attempt to read ZIP archive that was already closed"
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000752
Guido van Rossumd8faa362007-04-27 19:54:29 +0000753 # Only open a new file for instances where we were not
754 # given a file object in the constructor
755 if self._filePassed:
756 zef_file = self.fp
757 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000758 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000759
760 # Get info object for name
761 zinfo = self.getinfo(name)
762
763 filepos = zef_file.tell()
764
765 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000766
767 # Skip the file header:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000768 fheader = zef_file.read(30)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000769 if fheader[0:4] != stringFileHeader:
770 raise BadZipfile, "Bad magic number for file header"
771
772 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000773 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000774 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000775 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000776
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000777 if fname != zinfo.orig_filename.encode("utf-8"):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000778 raise BadZipfile, \
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000779 'File name in directory %r and header %r differ.' % (
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000780 zinfo.orig_filename, fname)
781
Guido van Rossumd8faa362007-04-27 19:54:29 +0000782 # check for encrypted flag & handle password
783 is_encrypted = zinfo.flag_bits & 0x1
784 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000785 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000786 if not pwd:
787 pwd = self.pwd
788 if not pwd:
789 raise RuntimeError, "File %s is encrypted, " \
790 "password required for extraction" % name
791
Thomas Wouterscf297e42007-02-23 15:07:44 +0000792 zd = _ZipDecrypter(pwd)
793 # The first 12 bytes in the cypher stream is an encryption header
794 # used to strengthen the algorithm. The first 11 bytes are
795 # completely random, while the 12th contains the MSB of the CRC,
796 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000797 bytes = zef_file.read(12)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000798 h = map(zd, bytes[0:12])
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000799 if h[11] != ((zinfo.CRC>>24) & 255):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000800 raise RuntimeError, "Bad password for file %s" % name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000801
802 # build and return a ZipExtFile
803 if zd is None:
804 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000805 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000806 zef = ZipExtFile(zef_file, zinfo, zd)
807
808 # set universal newlines on ZipExtFile if necessary
809 if "U" in mode:
810 zef.set_univ_newlines(True)
811 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000812
813 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000814 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000815 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000816 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000817 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000818 if self.mode not in ("w", "a"):
819 raise RuntimeError, 'write() requires mode "w" or "a"'
820 if not self.fp:
821 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000822 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000823 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
824 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000825 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000826 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
827 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000828 "That compression method is not supported"
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000829 if zinfo.file_size > ZIP64_LIMIT:
830 if not self._allowZip64:
831 raise LargeZipFile("Filesize would require ZIP64 extensions")
832 if zinfo.header_offset > ZIP64_LIMIT:
833 if not self._allowZip64:
834 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000835
836 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000837 """Put the bytes from filename into the archive under the name
838 arcname."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000839 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000840 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000841 date_time = mtime[0:6]
842 # Create ZipInfo instance to store file information
843 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000844 arcname = filename
845 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
846 while arcname[0] in (os.sep, os.altsep):
847 arcname = arcname[1:]
848 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000849 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000850 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000851 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000852 else:
Tim Peterse1190062001-01-15 03:34:38 +0000853 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000854
855 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000856 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000857 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000858
859 self._writecheck(zinfo)
860 self._didModify = True
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000861 fp = io.open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000862 # Must overwrite CRC and sizes with correct data later
863 zinfo.CRC = CRC = 0
864 zinfo.compress_size = compress_size = 0
865 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000866 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000867 if zinfo.compress_type == ZIP_DEFLATED:
868 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
869 zlib.DEFLATED, -15)
870 else:
871 cmpr = None
872 while 1:
873 buf = fp.read(1024 * 8)
874 if not buf:
875 break
876 file_size = file_size + len(buf)
877 CRC = binascii.crc32(buf, CRC)
878 if cmpr:
879 buf = cmpr.compress(buf)
880 compress_size = compress_size + len(buf)
881 self.fp.write(buf)
882 fp.close()
883 if cmpr:
884 buf = cmpr.flush()
885 compress_size = compress_size + len(buf)
886 self.fp.write(buf)
887 zinfo.compress_size = compress_size
888 else:
889 zinfo.compress_size = file_size
890 zinfo.CRC = CRC
891 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000892 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000893 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000894 self.fp.seek(zinfo.header_offset + 14, 0)
Brett Cannonff450f72004-07-10 19:09:20 +0000895 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000896 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000897 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000898 self.filelist.append(zinfo)
899 self.NameToInfo[zinfo.filename] = zinfo
900
Just van Rossumb083cb32002-12-12 12:23:32 +0000901 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000902 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +0000903 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
904 the name of the file in the archive."""
905 if not isinstance(zinfo_or_arcname, ZipInfo):
906 zinfo = ZipInfo(filename=zinfo_or_arcname,
907 date_time=time.localtime(time.time()))
908 zinfo.compress_type = self.compression
909 else:
910 zinfo = zinfo_or_arcname
Tim Peterse1190062001-01-15 03:34:38 +0000911 zinfo.file_size = len(bytes) # Uncompressed size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000912 zinfo.header_offset = self.fp.tell() # Start of header bytes
913 self._writecheck(zinfo)
914 self._didModify = True
Tim Peterse1190062001-01-15 03:34:38 +0000915 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000916 if zinfo.compress_type == ZIP_DEFLATED:
917 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
918 zlib.DEFLATED, -15)
919 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000920 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000921 else:
922 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000923 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000924 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000925 self.fp.write(bytes)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000926 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000927 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000928 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +0000929 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +0000930 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000931 self.filelist.append(zinfo)
932 self.NameToInfo[zinfo.filename] = zinfo
933
934 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +0000935 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000936 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000937
938 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +0000939 """Close the file, and for mode "w" and "a" write the ending
940 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000941 if self.fp is None:
942 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000943
944 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000945 count = 0
946 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +0000947 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000948 count = count + 1
949 dt = zinfo.date_time
950 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000951 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000952 extra = []
953 if zinfo.file_size > ZIP64_LIMIT \
954 or zinfo.compress_size > ZIP64_LIMIT:
955 extra.append(zinfo.file_size)
956 extra.append(zinfo.compress_size)
957 file_size = 0xffffffff #-1
958 compress_size = 0xffffffff #-1
959 else:
960 file_size = zinfo.file_size
961 compress_size = zinfo.compress_size
962
963 if zinfo.header_offset > ZIP64_LIMIT:
964 extra.append(zinfo.header_offset)
965 header_offset = -1 # struct "l" format: 32 one bits
966 else:
967 header_offset = zinfo.header_offset
968
969 extra_data = zinfo.extra
970 if extra:
971 # Append a ZIP64 field to the extra's
972 extra_data = struct.pack(
973 '<hh' + 'q'*len(extra),
974 1, 8*len(extra), *extra) + extra_data
975
976 extract_version = max(45, zinfo.extract_version)
977 create_version = max(45, zinfo.create_version)
978 else:
979 extract_version = zinfo.extract_version
980 create_version = zinfo.create_version
981
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000982 centdir = struct.pack(structCentralDir,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000983 stringCentralDir, create_version,
984 zinfo.create_system, extract_version, zinfo.reserved,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000985 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000986 zinfo.CRC, compress_size, file_size,
987 len(zinfo.filename), len(extra_data), len(zinfo.comment),
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000988 0, zinfo.internal_attr, zinfo.external_attr,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000989 header_offset)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000990 self.fp.write(centdir)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000991 self.fp.write(zinfo.filename.encode("utf-8"))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000992 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000993 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000994
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000995 pos2 = self.fp.tell()
996 # Write end-of-zip-archive record
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000997 if pos1 > ZIP64_LIMIT:
998 # Need to write the ZIP64 end-of-archive records
999 zip64endrec = struct.pack(
1000 structEndArchive64, stringEndArchive64,
1001 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1002 self.fp.write(zip64endrec)
1003
1004 zip64locrec = struct.pack(
1005 structEndArchive64Locator,
1006 stringEndArchive64Locator, 0, pos2, 1)
1007 self.fp.write(zip64locrec)
1008
1009 # XXX Why is `pos3` computed next? It's never referenced.
1010 pos3 = self.fp.tell()
1011 endrec = struct.pack(structEndArchive, stringEndArchive,
1012 0, 0, count, count, pos2 - pos1, -1, 0)
1013 self.fp.write(endrec)
1014
1015 else:
1016 endrec = struct.pack(structEndArchive, stringEndArchive,
1017 0, 0, count, count, pos2 - pos1, pos1, 0)
1018 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +00001019 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +00001020 if not self._filePassed:
1021 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001022 self.fp = None
1023
1024
1025class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001026 """Class to create ZIP archives with Python library files and packages."""
1027
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001028 def writepy(self, pathname, basename = ""):
1029 """Add all files from "pathname" to the ZIP archive.
1030
Fred Drake484d7352000-10-02 21:14:52 +00001031 If pathname is a package directory, search the directory and
1032 all package subdirectories recursively for all *.py and enter
1033 the modules into the archive. If pathname is a plain
1034 directory, listdir *.py and enter all modules. Else, pathname
1035 must be a Python *.py file and the module will be put into the
1036 archive. Added modules are always module.pyo or module.pyc.
1037 This method will compile the module.py into module.pyc if
1038 necessary.
1039 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001040 dir, name = os.path.split(pathname)
1041 if os.path.isdir(pathname):
1042 initname = os.path.join(pathname, "__init__.py")
1043 if os.path.isfile(initname):
1044 # This is a package directory, add it
1045 if basename:
1046 basename = "%s/%s" % (basename, name)
1047 else:
1048 basename = name
1049 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001050 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001051 fname, arcname = self._get_codename(initname[0:-3], basename)
1052 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001053 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001054 self.write(fname, arcname)
1055 dirlist = os.listdir(pathname)
1056 dirlist.remove("__init__.py")
1057 # Add all *.py files and package subdirectories
1058 for filename in dirlist:
1059 path = os.path.join(pathname, filename)
1060 root, ext = os.path.splitext(filename)
1061 if os.path.isdir(path):
1062 if os.path.isfile(os.path.join(path, "__init__.py")):
1063 # This is a package directory, add it
1064 self.writepy(path, basename) # Recursive call
1065 elif ext == ".py":
1066 fname, arcname = self._get_codename(path[0:-3],
1067 basename)
1068 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001069 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001070 self.write(fname, arcname)
1071 else:
1072 # This is NOT a package directory, add its files at top level
1073 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001074 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001075 for filename in os.listdir(pathname):
1076 path = os.path.join(pathname, filename)
1077 root, ext = os.path.splitext(filename)
1078 if ext == ".py":
1079 fname, arcname = self._get_codename(path[0:-3],
1080 basename)
1081 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001082 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001083 self.write(fname, arcname)
1084 else:
1085 if pathname[-3:] != ".py":
1086 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001087 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001088 fname, arcname = self._get_codename(pathname[0:-3], basename)
1089 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001090 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001091 self.write(fname, arcname)
1092
1093 def _get_codename(self, pathname, basename):
1094 """Return (filename, archivename) for the path.
1095
Fred Drake484d7352000-10-02 21:14:52 +00001096 Given a module name path, return the correct file path and
1097 archive name, compiling if necessary. For example, given
1098 /python/lib/string, return (/python/lib/string.pyc, string).
1099 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001100 file_py = pathname + ".py"
1101 file_pyc = pathname + ".pyc"
1102 file_pyo = pathname + ".pyo"
1103 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001104 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001105 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001106 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001107 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001108 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001109 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001110 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001111 try:
1112 py_compile.compile(file_py, file_pyc, None, True)
Guido van Rossumb940e112007-01-10 16:19:56 +00001113 except py_compile.PyCompileError as err:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001114 print(err.msg)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001115 fname = file_pyc
1116 else:
1117 fname = file_pyc
1118 archivename = os.path.split(fname)[1]
1119 if basename:
1120 archivename = "%s/%s" % (basename, archivename)
1121 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001122
1123
1124def main(args = None):
1125 import textwrap
1126 USAGE=textwrap.dedent("""\
1127 Usage:
1128 zipfile.py -l zipfile.zip # Show listing of a zipfile
1129 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1130 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1131 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1132 """)
1133 if args is None:
1134 args = sys.argv[1:]
1135
1136 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001137 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001138 sys.exit(1)
1139
1140 if args[0] == '-l':
1141 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001142 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001143 sys.exit(1)
1144 zf = ZipFile(args[1], 'r')
1145 zf.printdir()
1146 zf.close()
1147
1148 elif args[0] == '-t':
1149 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001150 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001151 sys.exit(1)
1152 zf = ZipFile(args[1], 'r')
1153 zf.testzip()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001154 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001155
1156 elif args[0] == '-e':
1157 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001158 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001159 sys.exit(1)
1160
1161 zf = ZipFile(args[1], 'r')
1162 out = args[2]
1163 for path in zf.namelist():
1164 if path.startswith('./'):
1165 tgt = os.path.join(out, path[2:])
1166 else:
1167 tgt = os.path.join(out, path)
1168
1169 tgtdir = os.path.dirname(tgt)
1170 if not os.path.exists(tgtdir):
1171 os.makedirs(tgtdir)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001172 fp = io.open(tgt, 'wb')
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001173 fp.write(zf.read(path))
1174 fp.close()
1175 zf.close()
1176
1177 elif args[0] == '-c':
1178 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001179 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001180 sys.exit(1)
1181
1182 def addToZip(zf, path, zippath):
1183 if os.path.isfile(path):
1184 zf.write(path, zippath, ZIP_DEFLATED)
1185 elif os.path.isdir(path):
1186 for nm in os.listdir(path):
1187 addToZip(zf,
1188 os.path.join(path, nm), os.path.join(zippath, nm))
1189 # else: ignore
1190
1191 zf = ZipFile(args[1], 'w', allowZip64=True)
1192 for src in args[2:]:
1193 addToZip(zf, src, os.path.basename(src))
1194
1195 zf.close()
1196
1197if __name__ == "__main__":
1198 main()