blob: 87ee1948f27237b8213181e229693f4a14a0da94 [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00005import binascii, cStringIO, stat
Antoine Pitrou94c33eb2010-01-27 20:59:50 +00006import io
7import re
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000021
22
23class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000024 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000025 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +000031ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwis8c436412008-07-03 12:51:14 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000045
Martin v. Löwis8c436412008-07-03 12:51:14 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000048structEndArchive = "<4s4H2LH"
49stringEndArchive = "PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000068stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwis8c436412008-07-03 12:51:14 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000095stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwis8c436412008-07-03 12:51:14 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = "PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = "PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000133def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000134 try:
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000135 if _EndRecData(fp):
136 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000137 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000139 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000141def is_zipfile(filename):
142 """Quickly see if a file is a ZIP file by checking the magic number.
143
144 The filename argument may be a file or file-like object too.
145 """
146 result = False
147 try:
148 if hasattr(filename, "read"):
149 result = _check_zipfile(fp=filename)
150 else:
151 with open(filename, "rb") as fp:
152 result = _check_zipfile(fp)
153 except IOError:
154 pass
155 return result
156
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000157def _EndRecData64(fpin, offset, endrec):
158 """
159 Read the ZIP64 end-of-archive records and use that to update endrec
160 """
Georg Brandl86e0c892010-11-26 07:22:28 +0000161 try:
162 fpin.seek(offset - sizeEndCentDir64Locator, 2)
163 except IOError:
164 # If the seek fails, the file is not large enough to contain a ZIP64
165 # end-of-archive record, so just return the end record we were given.
166 return endrec
167
Martin v. Löwis8c436412008-07-03 12:51:14 +0000168 data = fpin.read(sizeEndCentDir64Locator)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000169 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
170 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000171 return endrec
172
173 if diskno != 0 or disks != 1:
174 raise BadZipfile("zipfiles that span multiple disks are not supported")
175
Tim Petersa608bb22006-06-15 18:06:29 +0000176 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000177 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
178 data = fpin.read(sizeEndCentDir64)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000179 sig, sz, create_version, read_version, disk_num, disk_dir, \
180 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000181 struct.unpack(structEndArchive64, data)
182 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000183 return endrec
184
185 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000186 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000187 endrec[_ECD_DISK_NUMBER] = disk_num
188 endrec[_ECD_DISK_START] = disk_dir
189 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
190 endrec[_ECD_ENTRIES_TOTAL] = dircount2
191 endrec[_ECD_SIZE] = dirsize
192 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000193 return endrec
194
195
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000196def _EndRecData(fpin):
197 """Return data from the "End of Central Directory" record, or None.
198
199 The data is a list of the nine items in the ZIP "End of central dir"
200 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000201
202 # Determine file size
203 fpin.seek(0, 2)
204 filesize = fpin.tell()
205
206 # Check to see if this is ZIP file with no archive comment (the
207 # "end of central directory" structure should be the last item in the
208 # file if this is the case).
Amaury Forgeot d'Arc3e5b0272009-07-28 22:15:30 +0000209 try:
210 fpin.seek(-sizeEndCentDir, 2)
211 except IOError:
212 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000213 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000214 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
Martin v. Löwis8c436412008-07-03 12:51:14 +0000215 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000216 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000217 endrec=list(endrec)
218
219 # Append a blank comment and record start offset
220 endrec.append("")
221 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000222
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000223 # Try to read the "Zip64 end of central directory" structure
224 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000225
226 # Either this is not a ZIP file, or it is a ZIP file with an archive
227 # comment. Search the end of the file for the "end of central directory"
228 # record signature. The comment is the last item in the ZIP file and may be
229 # up to 64K long. It is assumed that the "end of central directory" magic
230 # number does not appear in the comment.
231 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
232 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000233 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000234 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000235 if start >= 0:
236 # found the magic number; attempt to unpack and interpret
237 recData = data[start:start+sizeEndCentDir]
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000238 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray873c5832011-06-09 16:01:09 -0400239 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
240 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
241 endrec.append(comment)
242 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000243
R David Murray873c5832011-06-09 16:01:09 -0400244 # Try to read the "Zip64 end of central directory" structure
245 return _EndRecData64(fpin, maxCommentStart + start - filesize,
246 endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000247
248 # Unable to find a valid end of central directory structure
249 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000250
Fred Drake484d7352000-10-02 21:14:52 +0000251
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000252class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000253 """Class with attributes describing each file in the ZIP archive."""
254
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000255 __slots__ = (
256 'orig_filename',
257 'filename',
258 'date_time',
259 'compress_type',
260 'comment',
261 'extra',
262 'create_system',
263 'create_version',
264 'extract_version',
265 'reserved',
266 'flag_bits',
267 'volume',
268 'internal_attr',
269 'external_attr',
270 'header_offset',
271 'CRC',
272 'compress_size',
273 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000274 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000275 )
276
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000277 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000278 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000279
280 # Terminate the file name at the first null byte. Null bytes in file
281 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000282 null_byte = filename.find(chr(0))
283 if null_byte >= 0:
284 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000285 # This is used to ensure paths in generated ZIP files always use
286 # forward slashes as the directory separator, as required by the
287 # ZIP format specification.
288 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000289 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000290
Greg Ward8e36d282003-06-18 00:53:06 +0000291 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000292 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaranddd40312011-10-20 01:38:35 +0800293
294 if date_time[0] < 1980:
295 raise ValueError('ZIP does not support timestamps before 1980')
296
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000297 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000298 self.compress_type = ZIP_STORED # Type of compression for the file
299 self.comment = "" # Comment for each file
300 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000301 if sys.platform == 'win32':
302 self.create_system = 0 # System which created ZIP archive
303 else:
304 # Assume everything else is unix-y
305 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000306 self.create_version = 20 # Version which created ZIP archive
307 self.extract_version = 20 # Version needed to extract archive
308 self.reserved = 0 # Must be zero
309 self.flag_bits = 0 # ZIP flag bits
310 self.volume = 0 # Volume number of file header
311 self.internal_attr = 0 # Internal attributes
312 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000313 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000314 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000315 # CRC CRC-32 of the uncompressed file
316 # compress_size Size of the compressed file
317 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000318
319 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000320 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000321 dt = self.date_time
322 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000323 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000324 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000325 # Set these to zero because we write them after the file data
326 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000327 else:
Tim Peterse1190062001-01-15 03:34:38 +0000328 CRC = self.CRC
329 compress_size = self.compress_size
330 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000331
332 extra = self.extra
333
334 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
335 # File is larger than what fits into a 4 byte integer,
336 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000337 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000338 extra = extra + struct.pack(fmt,
339 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000340 file_size = 0xffffffff
341 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000342 self.extract_version = max(45, self.extract_version)
343 self.create_version = max(45, self.extract_version)
344
Martin v. Löwis471617d2008-05-05 17:16:58 +0000345 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000346 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000347 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000348 self.compress_type, dostime, dosdate, CRC,
349 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000350 len(filename), len(extra))
351 return header + filename + extra
352
353 def _encodeFilenameFlags(self):
354 if isinstance(self.filename, unicode):
355 try:
356 return self.filename.encode('ascii'), self.flag_bits
357 except UnicodeEncodeError:
358 return self.filename.encode('utf-8'), self.flag_bits | 0x800
359 else:
360 return self.filename, self.flag_bits
361
362 def _decodeFilename(self):
363 if self.flag_bits & 0x800:
364 return self.filename.decode('utf-8')
365 else:
366 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000367
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000368 def _decodeExtra(self):
369 # Try to decode the extra field.
370 extra = self.extra
371 unpack = struct.unpack
372 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000373 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000374 if tp == 1:
375 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000376 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000377 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000378 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000379 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000380 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000381 elif ln == 0:
382 counts = ()
383 else:
384 raise RuntimeError, "Corrupt extra field %s"%(ln,)
385
386 idx = 0
387
388 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000389 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000390 self.file_size = counts[idx]
391 idx += 1
392
Martin v. Löwis8c436412008-07-03 12:51:14 +0000393 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000394 self.compress_size = counts[idx]
395 idx += 1
396
Martin v. Löwis8c436412008-07-03 12:51:14 +0000397 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000398 old = self.header_offset
399 self.header_offset = counts[idx]
400 idx+=1
401
402 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000403
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000404
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000405class _ZipDecrypter:
406 """Class to handle decryption of files stored within a ZIP archive.
407
408 ZIP supports a password-based form of encryption. Even though known
409 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000410 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000411
412 Usage:
413 zd = _ZipDecrypter(mypwd)
414 plain_char = zd(cypher_char)
415 plain_text = map(zd, cypher_text)
416 """
417
418 def _GenerateCRCTable():
419 """Generate a CRC-32 table.
420
421 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
422 internal keys. We noticed that a direct implementation is faster than
423 relying on binascii.crc32().
424 """
425 poly = 0xedb88320
426 table = [0] * 256
427 for i in range(256):
428 crc = i
429 for j in range(8):
430 if crc & 1:
431 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
432 else:
433 crc = ((crc >> 1) & 0x7FFFFFFF)
434 table[i] = crc
435 return table
436 crctable = _GenerateCRCTable()
437
438 def _crc32(self, ch, crc):
439 """Compute the CRC32 primitive on one byte."""
440 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
441
442 def __init__(self, pwd):
443 self.key0 = 305419896
444 self.key1 = 591751049
445 self.key2 = 878082192
446 for p in pwd:
447 self._UpdateKeys(p)
448
449 def _UpdateKeys(self, c):
450 self.key0 = self._crc32(c, self.key0)
451 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
452 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
453 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
454
455 def __call__(self, c):
456 """Decrypt a single character."""
457 c = ord(c)
458 k = self.key2 | 2
459 c = c ^ (((k * (k^1)) >> 8) & 255)
460 c = chr(c)
461 self._UpdateKeys(c)
462 return c
463
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000464class ZipExtFile(io.BufferedIOBase):
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000465 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000466 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000467 """
Tim Petersea5962f2007-03-12 18:07:52 +0000468
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000469 # Max size supported by decompressor.
470 MAX_N = 1 << 31 - 1
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000471
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000472 # Read from compressed files in 4k blocks.
473 MIN_READ_SIZE = 4096
Tim Petersea5962f2007-03-12 18:07:52 +0000474
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000475 # Search for universal newlines or line chunks.
476 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
477
Jesus Cea93d628b2012-11-04 02:32:08 +0100478 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
479 close_fileobj=False):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000480 self._fileobj = fileobj
481 self._decrypter = decrypter
Jesus Cea93d628b2012-11-04 02:32:08 +0100482 self._close_fileobj = close_fileobj
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000483
Ezio Melotti4611b052010-01-28 01:41:30 +0000484 self._compress_type = zipinfo.compress_type
485 self._compress_size = zipinfo.compress_size
486 self._compress_left = zipinfo.compress_size
487
488 if self._compress_type == ZIP_DEFLATED:
489 self._decompressor = zlib.decompressobj(-15)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000490 self._unconsumed = ''
491
492 self._readbuffer = ''
493 self._offset = 0
494
495 self._universal = 'U' in mode
496 self.newlines = None
497
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000498 # Adjust read size for encrypted files since the first 12 bytes
499 # are for the encryption/password information.
500 if self._decrypter is not None:
501 self._compress_left -= 12
502
503 self.mode = mode
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000504 self.name = zipinfo.filename
505
Antoine Pitroue1436d12010-08-12 15:25:51 +0000506 if hasattr(zipinfo, 'CRC'):
507 self._expected_crc = zipinfo.CRC
508 self._running_crc = crc32(b'') & 0xffffffff
509 else:
510 self._expected_crc = None
511
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000512 def readline(self, limit=-1):
513 """Read and return a line from the stream.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000514
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000515 If limit is specified, at most limit bytes will be read.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000516 """
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000517
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000518 if not self._universal and limit < 0:
519 # Shortcut common case - newline found in buffer.
520 i = self._readbuffer.find('\n', self._offset) + 1
521 if i > 0:
522 line = self._readbuffer[self._offset: i]
523 self._offset = i
524 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000525
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000526 if not self._universal:
527 return io.BufferedIOBase.readline(self, limit)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000528
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000529 line = ''
530 while limit < 0 or len(line) < limit:
531 readahead = self.peek(2)
532 if readahead == '':
533 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000534
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000535 #
536 # Search for universal newlines or line chunks.
537 #
538 # The pattern returns either a line chunk or a newline, but not
539 # both. Combined with peek(2), we are assured that the sequence
540 # '\r\n' is always retrieved completely and never split into
541 # separate newlines - '\r', '\n' due to coincidental readaheads.
542 #
543 match = self.PATTERN.search(readahead)
544 newline = match.group('newline')
545 if newline is not None:
546 if self.newlines is None:
547 self.newlines = []
548 if newline not in self.newlines:
549 self.newlines.append(newline)
550 self._offset += len(newline)
551 return line + '\n'
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000552
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000553 chunk = match.group('chunk')
554 if limit >= 0:
555 chunk = chunk[: limit - len(line)]
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000556
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000557 self._offset += len(chunk)
558 line += chunk
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000559
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000560 return line
561
562 def peek(self, n=1):
563 """Returns buffered bytes without advancing the position."""
564 if n > len(self._readbuffer) - self._offset:
565 chunk = self.read(n)
566 self._offset -= len(chunk)
567
568 # Return up to 512 bytes to reduce allocation overhead for tight loops.
569 return self._readbuffer[self._offset: self._offset + 512]
570
571 def readable(self):
572 return True
573
574 def read(self, n=-1):
575 """Read and return up to n bytes.
576 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000577 """
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000578 buf = ''
Antoine Pitroue4195e82010-09-12 14:56:27 +0000579 if n is None:
580 n = -1
581 while True:
582 if n < 0:
583 data = self.read1(n)
584 elif n > len(buf):
585 data = self.read1(n - len(buf))
586 else:
587 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000588 if len(data) == 0:
589 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000590 buf += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000591
Antoine Pitroue1436d12010-08-12 15:25:51 +0000592 def _update_crc(self, newdata, eof):
593 # Update the CRC using the given data.
594 if self._expected_crc is None:
595 # No need to compute the CRC if we don't have a reference value
596 return
597 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
598 # Check the CRC if we're at the end of the file
599 if eof and self._running_crc != self._expected_crc:
600 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
601
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000602 def read1(self, n):
603 """Read up to n bytes with at most one read() system call."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000604
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000605 # Simplify algorithm (branching) by transforming negative n to large n.
606 if n < 0 or n is None:
607 n = self.MAX_N
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000608
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000609 # Bytes available in read buffer.
610 len_readbuffer = len(self._readbuffer) - self._offset
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000611
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000612 # Read from file.
613 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
614 nbytes = n - len_readbuffer - len(self._unconsumed)
615 nbytes = max(nbytes, self.MIN_READ_SIZE)
616 nbytes = min(nbytes, self._compress_left)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000617
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000618 data = self._fileobj.read(nbytes)
619 self._compress_left -= len(data)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000620
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000621 if data and self._decrypter is not None:
622 data = ''.join(map(self._decrypter, data))
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000623
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000624 if self._compress_type == ZIP_STORED:
Antoine Pitroue1436d12010-08-12 15:25:51 +0000625 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000626 self._readbuffer = self._readbuffer[self._offset:] + data
627 self._offset = 0
628 else:
629 # Prepare deflated bytes for decompression.
630 self._unconsumed += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000631
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000632 # Handle unconsumed data.
Ezio Melotti4611b052010-01-28 01:41:30 +0000633 if (len(self._unconsumed) > 0 and n > len_readbuffer and
634 self._compress_type == ZIP_DEFLATED):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000635 data = self._decompressor.decompress(
636 self._unconsumed,
637 max(n - len_readbuffer, self.MIN_READ_SIZE)
638 )
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000639
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000640 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitroue1436d12010-08-12 15:25:51 +0000641 eof = len(self._unconsumed) == 0 and self._compress_left == 0
642 if eof:
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000643 data += self._decompressor.flush()
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000644
Antoine Pitroue1436d12010-08-12 15:25:51 +0000645 self._update_crc(data, eof=eof)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000646 self._readbuffer = self._readbuffer[self._offset:] + data
647 self._offset = 0
648
649 # Read from buffer.
650 data = self._readbuffer[self._offset: self._offset + n]
651 self._offset += len(data)
652 return data
653
Jesus Cea93d628b2012-11-04 02:32:08 +0100654 def close(self):
655 try :
656 if self._close_fileobj:
657 self._fileobj.close()
658 finally:
659 super(ZipExtFile, self).close()
Tim Petersea5962f2007-03-12 18:07:52 +0000660
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000661
R David Murray3f4ccba2012-04-12 18:42:47 -0400662class ZipFile(object):
Tim Petersa19a1682001-03-29 04:36:09 +0000663 """ Class with methods to open, read, write, close, list zip files.
664
Martin v. Löwis8c436412008-07-03 12:51:14 +0000665 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000666
Fred Drake3d9091e2001-03-26 15:49:24 +0000667 file: Either the path to the file, or a file-like object.
668 If it is a path, the file will be opened and closed by ZipFile.
669 mode: The mode can be either read "r", write "w" or append "a".
670 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000671 allowZip64: if True ZipFile will create files with ZIP64 extensions when
672 needed, otherwise it will raise an exception when this would
673 be necessary.
674
Fred Drake3d9091e2001-03-26 15:49:24 +0000675 """
Fred Drake484d7352000-10-02 21:14:52 +0000676
Fred Drake90eac282001-02-28 05:29:34 +0000677 fp = None # Set here since __del__ checks it
678
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000679 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000680 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000681 if mode not in ("r", "w", "a"):
682 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
683
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000684 if compression == ZIP_STORED:
685 pass
686 elif compression == ZIP_DEFLATED:
687 if not zlib:
688 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000689 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000690 else:
691 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000692
693 self._allowZip64 = allowZip64
694 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000695 self.debug = 0 # Level of printing: 0 through 3
696 self.NameToInfo = {} # Find file info given name
697 self.filelist = [] # List of ZipInfo instances for archive
698 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000699 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000700 self.pwd = None
R David Murray3f4ccba2012-04-12 18:42:47 -0400701 self._comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000702
Fred Drake3d9091e2001-03-26 15:49:24 +0000703 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000704 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000705 self._filePassed = 0
706 self.filename = file
707 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000708 try:
709 self.fp = open(file, modeDict[mode])
710 except IOError:
711 if mode == 'a':
712 mode = key = 'w'
713 self.fp = open(file, modeDict[mode])
714 else:
715 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000716 else:
717 self._filePassed = 1
718 self.fp = file
719 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000720
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000721 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000722 self._GetContents()
723 elif key == 'w':
Georg Brandl86e0c892010-11-26 07:22:28 +0000724 # set the modified flag so central directory gets written
725 # even if no files are added to the archive
726 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000727 elif key == 'a':
Georg Brandl86e0c892010-11-26 07:22:28 +0000728 try:
729 # See if file is a zip file
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000730 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000731 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000732 self.fp.seek(self.start_dir, 0)
Georg Brandl86e0c892010-11-26 07:22:28 +0000733 except BadZipfile:
734 # file is not a zip file, just append
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000735 self.fp.seek(0, 2)
Georg Brandl86e0c892010-11-26 07:22:28 +0000736
737 # set the modified flag so central directory gets written
738 # even if no files are added to the archive
739 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000740 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000741 if not self._filePassed:
742 self.fp.close()
743 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000744 raise RuntimeError, 'Mode must be "r", "w" or "a"'
745
Ezio Melotti569e61f2009-12-30 06:14:51 +0000746 def __enter__(self):
747 return self
748
749 def __exit__(self, type, value, traceback):
750 self.close()
751
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000752 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000753 """Read the directory, making sure we close the file if the format
754 is bad."""
755 try:
756 self._RealGetContents()
757 except BadZipfile:
758 if not self._filePassed:
759 self.fp.close()
760 self.fp = None
761 raise
762
763 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000764 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000765 fp = self.fp
Georg Brandl86e0c892010-11-26 07:22:28 +0000766 try:
767 endrec = _EndRecData(fp)
768 except IOError:
769 raise BadZipfile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000770 if not endrec:
771 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000772 if self.debug > 1:
773 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000774 size_cd = endrec[_ECD_SIZE] # bytes in central directory
775 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murray3f4ccba2012-04-12 18:42:47 -0400776 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwis8c436412008-07-03 12:51:14 +0000777
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000778 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000779 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000780 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
781 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000782 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
783
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000784 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000785 inferred = concat + offset_cd
786 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000787 # self.start_dir: Position of start of central directory
788 self.start_dir = offset_cd + concat
789 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000790 data = fp.read(size_cd)
791 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000792 total = 0
793 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000794 centdir = fp.read(sizeCentralDir)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000795 if centdir[0:4] != stringCentralDir:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000796 raise BadZipfile, "Bad magic number for central directory"
797 centdir = struct.unpack(structCentralDir, centdir)
798 if self.debug > 2:
799 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000800 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000801 # Create ZipInfo instance to store file information
802 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000803 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
804 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000805 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000806 (x.create_version, x.create_system, x.extract_version, x.reserved,
807 x.flag_bits, x.compress_type, t, d,
808 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
809 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
810 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000811 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000812 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000813 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000814
815 x._decodeExtra()
816 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000817 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000818 self.filelist.append(x)
819 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000820
821 # update total bytes read from central directory
822 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
823 + centdir[_CD_EXTRA_FIELD_LENGTH]
824 + centdir[_CD_COMMENT_LENGTH])
825
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000826 if self.debug > 2:
827 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000828
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000829
830 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000831 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000832 l = []
833 for data in self.filelist:
834 l.append(data.filename)
835 return l
836
837 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000838 """Return a list of class ZipInfo instances for files in the
839 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000840 return self.filelist
841
842 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000843 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000844 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
845 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000846 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000847 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
848
849 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000850 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000851 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000852 for zinfo in self.filelist:
853 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000854 # Read by chunks, to avoid an OverflowError or a
855 # MemoryError with very large embedded files.
856 f = self.open(zinfo.filename, "r")
857 while f.read(chunk_size): # Check CRC-32
858 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000859 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000860 return zinfo.filename
861
862 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000863 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000864 info = self.NameToInfo.get(name)
865 if info is None:
866 raise KeyError(
867 'There is no item named %r in the archive' % name)
868
869 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000870
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000871 def setpassword(self, pwd):
872 """Set default password for encrypted files."""
873 self.pwd = pwd
874
R David Murray3f4ccba2012-04-12 18:42:47 -0400875 @property
876 def comment(self):
877 """The comment text associated with the ZIP file."""
878 return self._comment
879
880 @comment.setter
881 def comment(self, comment):
882 # check for valid comment length
883 if len(comment) >= ZIP_MAX_COMMENT:
884 if self.debug:
885 print('Archive comment is too long; truncating to %d bytes'
886 % ZIP_MAX_COMMENT)
887 comment = comment[:ZIP_MAX_COMMENT]
888 self._comment = comment
889 self._didModify = True
890
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000891 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000892 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000893 return self.open(name, "r", pwd).read()
894
895 def open(self, name, mode="r", pwd=None):
896 """Return file-like object for 'name'."""
897 if mode not in ("r", "U", "rU"):
898 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000899 if not self.fp:
900 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000901 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000902
Tim Petersea5962f2007-03-12 18:07:52 +0000903 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000904 # given a file object in the constructor
905 if self._filePassed:
906 zef_file = self.fp
Jesus Cea93d628b2012-11-04 02:32:08 +0100907 should_close = False
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000908 else:
909 zef_file = open(self.filename, 'rb')
Jesus Cea93d628b2012-11-04 02:32:08 +0100910 should_close = True
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000911
Georg Brandl112aa502008-05-20 08:25:48 +0000912 # Make sure we have an info object
913 if isinstance(name, ZipInfo):
914 # 'name' is already an info object
915 zinfo = name
916 else:
917 # Get info object for name
918 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000919
920 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000921
922 # Skip the file header:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000923 fheader = zef_file.read(sizeFileHeader)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000924 if fheader[0:4] != stringFileHeader:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000925 raise BadZipfile, "Bad magic number for file header"
926
927 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000928 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000929 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000930 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000931
932 if fname != zinfo.orig_filename:
933 raise BadZipfile, \
934 'File name in directory "%s" and header "%s" differ.' % (
935 zinfo.orig_filename, fname)
936
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000937 # check for encrypted flag & handle password
938 is_encrypted = zinfo.flag_bits & 0x1
939 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000940 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000941 if not pwd:
942 pwd = self.pwd
943 if not pwd:
944 raise RuntimeError, "File %s is encrypted, " \
945 "password required for extraction" % name
946
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000947 zd = _ZipDecrypter(pwd)
948 # The first 12 bytes in the cypher stream is an encryption header
949 # used to strengthen the algorithm. The first 11 bytes are
950 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000951 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000952 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000953 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000954 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000955 if zinfo.flag_bits & 0x8:
956 # compare against the file type from extended local headers
957 check_byte = (zinfo._raw_time >> 8) & 0xff
958 else:
959 # compare against the CRC otherwise
960 check_byte = (zinfo.CRC >> 24) & 0xff
961 if ord(h[11]) != check_byte:
962 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000963
Jesus Cea93d628b2012-11-04 02:32:08 +0100964 return ZipExtFile(zef_file, mode, zinfo, zd,
965 close_fileobj=should_close)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000966
Georg Brandl62416bc2008-01-07 18:47:44 +0000967 def extract(self, member, path=None, pwd=None):
968 """Extract a member from the archive to the current working directory,
969 using its full name. Its file information is extracted as accurately
970 as possible. `member' may be a filename or a ZipInfo object. You can
971 specify a different directory using `path'.
972 """
973 if not isinstance(member, ZipInfo):
974 member = self.getinfo(member)
975
976 if path is None:
977 path = os.getcwd()
978
979 return self._extract_member(member, path, pwd)
980
981 def extractall(self, path=None, members=None, pwd=None):
982 """Extract all members from the archive to the current working
983 directory. `path' specifies a different directory to extract to.
984 `members' is optional and must be a subset of the list returned
985 by namelist().
986 """
987 if members is None:
988 members = self.namelist()
989
990 for zipinfo in members:
991 self.extract(zipinfo, path, pwd)
992
993 def _extract_member(self, member, targetpath, pwd):
994 """Extract the ZipInfo object 'member' to a physical
995 file on the path targetpath.
996 """
997 # build the destination pathname, replacing
998 # forward slashes to platform specific separators.
Antoine Pitrou97377bf2009-05-04 21:17:17 +0000999 # Strip trailing path separator, unless it represents the root.
1000 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
1001 and len(os.path.splitdrive(targetpath)[1]) > 1):
Georg Brandl62416bc2008-01-07 18:47:44 +00001002 targetpath = targetpath[:-1]
1003
1004 # don't include leading "/" from file name if present
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001005 if member.filename[0] == '/':
Georg Brandl62416bc2008-01-07 18:47:44 +00001006 targetpath = os.path.join(targetpath, member.filename[1:])
1007 else:
1008 targetpath = os.path.join(targetpath, member.filename)
1009
1010 targetpath = os.path.normpath(targetpath)
1011
1012 # Create all upper directories if necessary.
1013 upperdirs = os.path.dirname(targetpath)
1014 if upperdirs and not os.path.exists(upperdirs):
1015 os.makedirs(upperdirs)
1016
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001017 if member.filename[-1] == '/':
Martin v. Löwis0b09c422009-05-24 19:30:52 +00001018 if not os.path.isdir(targetpath):
1019 os.mkdir(targetpath)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001020 return targetpath
1021
Georg Brandl112aa502008-05-20 08:25:48 +00001022 source = self.open(member, pwd=pwd)
Georg Brandl62416bc2008-01-07 18:47:44 +00001023 target = file(targetpath, "wb")
1024 shutil.copyfileobj(source, target)
1025 source.close()
1026 target.close()
1027
1028 return targetpath
1029
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001030 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001031 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001032 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001033 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001034 print "Duplicate name:", zinfo.filename
1035 if self.mode not in ("w", "a"):
1036 raise RuntimeError, 'write() requires mode "w" or "a"'
1037 if not self.fp:
1038 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001039 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001040 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1041 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001042 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001043 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1044 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001045 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001046 if zinfo.file_size > ZIP64_LIMIT:
1047 if not self._allowZip64:
1048 raise LargeZipFile("Filesize would require ZIP64 extensions")
1049 if zinfo.header_offset > ZIP64_LIMIT:
1050 if not self._allowZip64:
1051 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001052
1053 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001054 """Put the bytes from filename into the archive under the name
1055 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001056 if not self.fp:
1057 raise RuntimeError(
1058 "Attempt to write to ZIP archive that was already closed")
1059
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001060 st = os.stat(filename)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001061 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001062 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001063 date_time = mtime[0:6]
1064 # Create ZipInfo instance to store file information
1065 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001066 arcname = filename
1067 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1068 while arcname[0] in (os.sep, os.altsep):
1069 arcname = arcname[1:]
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001070 if isdir:
1071 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001072 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001073 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001074 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001075 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001076 else:
Tim Peterse1190062001-01-15 03:34:38 +00001077 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001078
1079 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001080 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001081 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001082
1083 self._writecheck(zinfo)
1084 self._didModify = True
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001085
1086 if isdir:
1087 zinfo.file_size = 0
1088 zinfo.compress_size = 0
1089 zinfo.CRC = 0
1090 self.filelist.append(zinfo)
1091 self.NameToInfo[zinfo.filename] = zinfo
1092 self.fp.write(zinfo.FileHeader())
1093 return
1094
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001095 with open(filename, "rb") as fp:
1096 # Must overwrite CRC and sizes with correct data later
1097 zinfo.CRC = CRC = 0
1098 zinfo.compress_size = compress_size = 0
1099 zinfo.file_size = file_size = 0
1100 self.fp.write(zinfo.FileHeader())
1101 if zinfo.compress_type == ZIP_DEFLATED:
1102 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1103 zlib.DEFLATED, -15)
1104 else:
1105 cmpr = None
1106 while 1:
1107 buf = fp.read(1024 * 8)
1108 if not buf:
1109 break
1110 file_size = file_size + len(buf)
1111 CRC = crc32(buf, CRC) & 0xffffffff
1112 if cmpr:
1113 buf = cmpr.compress(buf)
1114 compress_size = compress_size + len(buf)
1115 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001116 if cmpr:
1117 buf = cmpr.flush()
1118 compress_size = compress_size + len(buf)
1119 self.fp.write(buf)
1120 zinfo.compress_size = compress_size
1121 else:
1122 zinfo.compress_size = file_size
1123 zinfo.CRC = CRC
1124 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001125 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001126 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001127 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001128 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001129 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001130 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001131 self.filelist.append(zinfo)
1132 self.NameToInfo[zinfo.filename] = zinfo
1133
Ronald Oussorendd25e862010-02-07 20:18:02 +00001134 def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001135 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001136 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1137 the name of the file in the archive."""
1138 if not isinstance(zinfo_or_arcname, ZipInfo):
1139 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001140 date_time=time.localtime(time.time())[:6])
Ronald Oussorendd25e862010-02-07 20:18:02 +00001141
Just van Rossumb083cb32002-12-12 12:23:32 +00001142 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001143 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001144 else:
1145 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001146
1147 if not self.fp:
1148 raise RuntimeError(
1149 "Attempt to write to ZIP archive that was already closed")
1150
Ronald Oussorendd25e862010-02-07 20:18:02 +00001151 if compress_type is not None:
1152 zinfo.compress_type = compress_type
1153
Tim Peterse1190062001-01-15 03:34:38 +00001154 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001155 zinfo.header_offset = self.fp.tell() # Start of header bytes
1156 self._writecheck(zinfo)
1157 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001158 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001159 if zinfo.compress_type == ZIP_DEFLATED:
1160 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1161 zlib.DEFLATED, -15)
1162 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001163 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001164 else:
1165 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +00001166 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001167 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001168 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001169 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001170 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001171 # Write CRC and file sizes after the file data
Gregory P. Smith26627332009-06-26 07:50:21 +00001172 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001173 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001174 self.filelist.append(zinfo)
1175 self.NameToInfo[zinfo.filename] = zinfo
1176
1177 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001178 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001179 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001180
1181 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001182 """Close the file, and for mode "w" and "a" write the ending
1183 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001184 if self.fp is None:
1185 return
Tim Petersa608bb22006-06-15 18:06:29 +00001186
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001187 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001188 count = 0
1189 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001190 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001191 count = count + 1
1192 dt = zinfo.date_time
1193 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001194 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001195 extra = []
1196 if zinfo.file_size > ZIP64_LIMIT \
1197 or zinfo.compress_size > ZIP64_LIMIT:
1198 extra.append(zinfo.file_size)
1199 extra.append(zinfo.compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001200 file_size = 0xffffffff
1201 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001202 else:
1203 file_size = zinfo.file_size
1204 compress_size = zinfo.compress_size
1205
1206 if zinfo.header_offset > ZIP64_LIMIT:
1207 extra.append(zinfo.header_offset)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001208 header_offset = 0xffffffffL
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001209 else:
1210 header_offset = zinfo.header_offset
1211
1212 extra_data = zinfo.extra
1213 if extra:
1214 # Append a ZIP64 field to the extra's
1215 extra_data = struct.pack(
Gregory P. Smithb89a0962008-03-19 01:46:10 +00001216 '<HH' + 'Q'*len(extra),
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001217 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001218
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001219 extract_version = max(45, zinfo.extract_version)
1220 create_version = max(45, zinfo.create_version)
1221 else:
1222 extract_version = zinfo.extract_version
1223 create_version = zinfo.create_version
1224
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001225 try:
Martin v. Löwis471617d2008-05-05 17:16:58 +00001226 filename, flag_bits = zinfo._encodeFilenameFlags()
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001227 centdir = struct.pack(structCentralDir,
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001228 stringCentralDir, create_version,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001229 zinfo.create_system, extract_version, zinfo.reserved,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001230 flag_bits, zinfo.compress_type, dostime, dosdate,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001231 zinfo.CRC, compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001232 len(filename), len(extra_data), len(zinfo.comment),
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001233 0, zinfo.internal_attr, zinfo.external_attr,
1234 header_offset)
1235 except DeprecationWarning:
1236 print >>sys.stderr, (structCentralDir,
1237 stringCentralDir, create_version,
1238 zinfo.create_system, extract_version, zinfo.reserved,
1239 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1240 zinfo.CRC, compress_size, file_size,
1241 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1242 0, zinfo.internal_attr, zinfo.external_attr,
1243 header_offset)
1244 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001245 self.fp.write(centdir)
Martin v. Löwis471617d2008-05-05 17:16:58 +00001246 self.fp.write(filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001247 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001248 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001249
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001250 pos2 = self.fp.tell()
1251 # Write end-of-zip-archive record
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001252 centDirCount = count
1253 centDirSize = pos2 - pos1
Martin v. Löwis8c436412008-07-03 12:51:14 +00001254 centDirOffset = pos1
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001255 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1256 centDirOffset > ZIP64_LIMIT or
1257 centDirSize > ZIP64_LIMIT):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001258 # Need to write the ZIP64 end-of-archive records
1259 zip64endrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001260 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001261 44, 45, 45, 0, 0, centDirCount, centDirCount,
1262 centDirSize, centDirOffset)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001263 self.fp.write(zip64endrec)
1264
1265 zip64locrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001266 structEndArchive64Locator,
1267 stringEndArchive64Locator, 0, pos2, 1)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001268 self.fp.write(zip64locrec)
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001269 centDirCount = min(centDirCount, 0xFFFF)
1270 centDirSize = min(centDirSize, 0xFFFFFFFF)
1271 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001272
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001273 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001274 0, 0, centDirCount, centDirCount,
R David Murray3f4ccba2012-04-12 18:42:47 -04001275 centDirSize, centDirOffset, len(self._comment))
Martin v. Löwis8c436412008-07-03 12:51:14 +00001276 self.fp.write(endrec)
R David Murray3f4ccba2012-04-12 18:42:47 -04001277 self.fp.write(self._comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001278 self.fp.flush()
Martin v. Löwis8c436412008-07-03 12:51:14 +00001279
Fred Drake3d9091e2001-03-26 15:49:24 +00001280 if not self._filePassed:
1281 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001282 self.fp = None
1283
1284
1285class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001286 """Class to create ZIP archives with Python library files and packages."""
1287
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001288 def writepy(self, pathname, basename = ""):
1289 """Add all files from "pathname" to the ZIP archive.
1290
Fred Drake484d7352000-10-02 21:14:52 +00001291 If pathname is a package directory, search the directory and
1292 all package subdirectories recursively for all *.py and enter
1293 the modules into the archive. If pathname is a plain
1294 directory, listdir *.py and enter all modules. Else, pathname
1295 must be a Python *.py file and the module will be put into the
1296 archive. Added modules are always module.pyo or module.pyc.
1297 This method will compile the module.py into module.pyc if
1298 necessary.
1299 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001300 dir, name = os.path.split(pathname)
1301 if os.path.isdir(pathname):
1302 initname = os.path.join(pathname, "__init__.py")
1303 if os.path.isfile(initname):
1304 # This is a package directory, add it
1305 if basename:
1306 basename = "%s/%s" % (basename, name)
1307 else:
1308 basename = name
1309 if self.debug:
1310 print "Adding package in", pathname, "as", basename
1311 fname, arcname = self._get_codename(initname[0:-3], basename)
1312 if self.debug:
1313 print "Adding", arcname
1314 self.write(fname, arcname)
1315 dirlist = os.listdir(pathname)
1316 dirlist.remove("__init__.py")
1317 # Add all *.py files and package subdirectories
1318 for filename in dirlist:
1319 path = os.path.join(pathname, filename)
1320 root, ext = os.path.splitext(filename)
1321 if os.path.isdir(path):
1322 if os.path.isfile(os.path.join(path, "__init__.py")):
1323 # This is a package directory, add it
1324 self.writepy(path, basename) # Recursive call
1325 elif ext == ".py":
1326 fname, arcname = self._get_codename(path[0:-3],
1327 basename)
1328 if self.debug:
1329 print "Adding", arcname
1330 self.write(fname, arcname)
1331 else:
1332 # This is NOT a package directory, add its files at top level
1333 if self.debug:
1334 print "Adding files from directory", pathname
1335 for filename in os.listdir(pathname):
1336 path = os.path.join(pathname, filename)
1337 root, ext = os.path.splitext(filename)
1338 if ext == ".py":
1339 fname, arcname = self._get_codename(path[0:-3],
1340 basename)
1341 if self.debug:
1342 print "Adding", arcname
1343 self.write(fname, arcname)
1344 else:
1345 if pathname[-3:] != ".py":
1346 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001347 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001348 fname, arcname = self._get_codename(pathname[0:-3], basename)
1349 if self.debug:
1350 print "Adding file", arcname
1351 self.write(fname, arcname)
1352
1353 def _get_codename(self, pathname, basename):
1354 """Return (filename, archivename) for the path.
1355
Fred Drake484d7352000-10-02 21:14:52 +00001356 Given a module name path, return the correct file path and
1357 archive name, compiling if necessary. For example, given
1358 /python/lib/string, return (/python/lib/string.pyc, string).
1359 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001360 file_py = pathname + ".py"
1361 file_pyc = pathname + ".pyc"
1362 file_pyo = pathname + ".pyo"
1363 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001364 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001365 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001366 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001367 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001368 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001369 if self.debug:
1370 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001371 try:
1372 py_compile.compile(file_py, file_pyc, None, True)
1373 except py_compile.PyCompileError,err:
1374 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001375 fname = file_pyc
1376 else:
1377 fname = file_pyc
1378 archivename = os.path.split(fname)[1]
1379 if basename:
1380 archivename = "%s/%s" % (basename, archivename)
1381 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001382
1383
1384def main(args = None):
1385 import textwrap
1386 USAGE=textwrap.dedent("""\
1387 Usage:
1388 zipfile.py -l zipfile.zip # Show listing of a zipfile
1389 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1390 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1391 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1392 """)
1393 if args is None:
1394 args = sys.argv[1:]
1395
1396 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1397 print USAGE
1398 sys.exit(1)
1399
1400 if args[0] == '-l':
1401 if len(args) != 2:
1402 print USAGE
1403 sys.exit(1)
1404 zf = ZipFile(args[1], 'r')
1405 zf.printdir()
1406 zf.close()
1407
1408 elif args[0] == '-t':
1409 if len(args) != 2:
1410 print USAGE
1411 sys.exit(1)
1412 zf = ZipFile(args[1], 'r')
Antoine Pitroue1436d12010-08-12 15:25:51 +00001413 badfile = zf.testzip()
1414 if badfile:
1415 print("The following enclosed file is corrupted: {!r}".format(badfile))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001416 print "Done testing"
1417
1418 elif args[0] == '-e':
1419 if len(args) != 3:
1420 print USAGE
1421 sys.exit(1)
1422
1423 zf = ZipFile(args[1], 'r')
1424 out = args[2]
1425 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001426 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001427 tgt = os.path.join(out, path[2:])
1428 else:
1429 tgt = os.path.join(out, path)
1430
1431 tgtdir = os.path.dirname(tgt)
1432 if not os.path.exists(tgtdir):
1433 os.makedirs(tgtdir)
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001434 with open(tgt, 'wb') as fp:
1435 fp.write(zf.read(path))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001436 zf.close()
1437
1438 elif args[0] == '-c':
1439 if len(args) < 3:
1440 print USAGE
1441 sys.exit(1)
1442
1443 def addToZip(zf, path, zippath):
1444 if os.path.isfile(path):
1445 zf.write(path, zippath, ZIP_DEFLATED)
1446 elif os.path.isdir(path):
1447 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001448 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001449 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001450 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001451
1452 zf = ZipFile(args[1], 'w', allowZip64=True)
1453 for src in args[2:]:
1454 addToZip(zf, src, os.path.basename(src))
1455
1456 zf.close()
1457
1458if __name__ == "__main__":
1459 main()