blob: 3a44d3a9de43b7f46520e682c8dae255ce9ef8c6 [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00005import binascii, cStringIO, stat
Antoine Pitrou94c33eb2010-01-27 20:59:50 +00006import io
7import re
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000021
22
23class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000024 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000025 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +000031ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwis8c436412008-07-03 12:51:14 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000045
Martin v. Löwis8c436412008-07-03 12:51:14 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000048structEndArchive = "<4s4H2LH"
49stringEndArchive = "PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000068stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwis8c436412008-07-03 12:51:14 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000095stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwis8c436412008-07-03 12:51:14 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = "PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = "PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000133def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000134 try:
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000135 if _EndRecData(fp):
136 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000137 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000139 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000141def is_zipfile(filename):
142 """Quickly see if a file is a ZIP file by checking the magic number.
143
144 The filename argument may be a file or file-like object too.
145 """
146 result = False
147 try:
148 if hasattr(filename, "read"):
149 result = _check_zipfile(fp=filename)
150 else:
151 with open(filename, "rb") as fp:
152 result = _check_zipfile(fp)
153 except IOError:
154 pass
155 return result
156
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000157def _EndRecData64(fpin, offset, endrec):
158 """
159 Read the ZIP64 end-of-archive records and use that to update endrec
160 """
Martin v. Löwis8c436412008-07-03 12:51:14 +0000161 fpin.seek(offset - sizeEndCentDir64Locator, 2)
162 data = fpin.read(sizeEndCentDir64Locator)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000163 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
164 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000165 return endrec
166
167 if diskno != 0 or disks != 1:
168 raise BadZipfile("zipfiles that span multiple disks are not supported")
169
Tim Petersa608bb22006-06-15 18:06:29 +0000170 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000171 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
172 data = fpin.read(sizeEndCentDir64)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000173 sig, sz, create_version, read_version, disk_num, disk_dir, \
174 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000175 struct.unpack(structEndArchive64, data)
176 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000177 return endrec
178
179 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000180 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000181 endrec[_ECD_DISK_NUMBER] = disk_num
182 endrec[_ECD_DISK_START] = disk_dir
183 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
184 endrec[_ECD_ENTRIES_TOTAL] = dircount2
185 endrec[_ECD_SIZE] = dirsize
186 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000187 return endrec
188
189
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000190def _EndRecData(fpin):
191 """Return data from the "End of Central Directory" record, or None.
192
193 The data is a list of the nine items in the ZIP "End of central dir"
194 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000195
196 # Determine file size
197 fpin.seek(0, 2)
198 filesize = fpin.tell()
199
200 # Check to see if this is ZIP file with no archive comment (the
201 # "end of central directory" structure should be the last item in the
202 # file if this is the case).
Amaury Forgeot d'Arc3e5b0272009-07-28 22:15:30 +0000203 try:
204 fpin.seek(-sizeEndCentDir, 2)
205 except IOError:
206 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000207 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000208 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
Martin v. Löwis8c436412008-07-03 12:51:14 +0000209 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000210 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000211 endrec=list(endrec)
212
213 # Append a blank comment and record start offset
214 endrec.append("")
215 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000216
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000217 # Try to read the "Zip64 end of central directory" structure
218 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000219
220 # Either this is not a ZIP file, or it is a ZIP file with an archive
221 # comment. Search the end of the file for the "end of central directory"
222 # record signature. The comment is the last item in the ZIP file and may be
223 # up to 64K long. It is assumed that the "end of central directory" magic
224 # number does not appear in the comment.
225 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
226 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000227 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000228 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000229 if start >= 0:
230 # found the magic number; attempt to unpack and interpret
231 recData = data[start:start+sizeEndCentDir]
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000232 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwis8c436412008-07-03 12:51:14 +0000233 comment = data[start+sizeEndCentDir:]
234 # check that comment length is correct
235 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000236 # Append the archive comment and start offset
237 endrec.append(comment)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000238 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000239
240 # Try to read the "Zip64 end of central directory" structure
241 return _EndRecData64(fpin, maxCommentStart + start - filesize,
242 endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000243
244 # Unable to find a valid end of central directory structure
245 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000246
Fred Drake484d7352000-10-02 21:14:52 +0000247
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000248class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000249 """Class with attributes describing each file in the ZIP archive."""
250
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000251 __slots__ = (
252 'orig_filename',
253 'filename',
254 'date_time',
255 'compress_type',
256 'comment',
257 'extra',
258 'create_system',
259 'create_version',
260 'extract_version',
261 'reserved',
262 'flag_bits',
263 'volume',
264 'internal_attr',
265 'external_attr',
266 'header_offset',
267 'CRC',
268 'compress_size',
269 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000270 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000271 )
272
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000273 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000274 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000275
276 # Terminate the file name at the first null byte. Null bytes in file
277 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000278 null_byte = filename.find(chr(0))
279 if null_byte >= 0:
280 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000281 # This is used to ensure paths in generated ZIP files always use
282 # forward slashes as the directory separator, as required by the
283 # ZIP format specification.
284 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000285 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000286
Greg Ward8e36d282003-06-18 00:53:06 +0000287 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000288 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000289 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000290 self.compress_type = ZIP_STORED # Type of compression for the file
291 self.comment = "" # Comment for each file
292 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000293 if sys.platform == 'win32':
294 self.create_system = 0 # System which created ZIP archive
295 else:
296 # Assume everything else is unix-y
297 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000298 self.create_version = 20 # Version which created ZIP archive
299 self.extract_version = 20 # Version needed to extract archive
300 self.reserved = 0 # Must be zero
301 self.flag_bits = 0 # ZIP flag bits
302 self.volume = 0 # Volume number of file header
303 self.internal_attr = 0 # Internal attributes
304 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000305 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000306 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000307 # CRC CRC-32 of the uncompressed file
308 # compress_size Size of the compressed file
309 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000310
311 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000312 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000313 dt = self.date_time
314 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000315 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000317 # Set these to zero because we write them after the file data
318 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000319 else:
Tim Peterse1190062001-01-15 03:34:38 +0000320 CRC = self.CRC
321 compress_size = self.compress_size
322 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000323
324 extra = self.extra
325
326 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
327 # File is larger than what fits into a 4 byte integer,
328 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000329 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000330 extra = extra + struct.pack(fmt,
331 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000332 file_size = 0xffffffff
333 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000334 self.extract_version = max(45, self.extract_version)
335 self.create_version = max(45, self.extract_version)
336
Martin v. Löwis471617d2008-05-05 17:16:58 +0000337 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000338 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000339 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000340 self.compress_type, dostime, dosdate, CRC,
341 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000342 len(filename), len(extra))
343 return header + filename + extra
344
345 def _encodeFilenameFlags(self):
346 if isinstance(self.filename, unicode):
347 try:
348 return self.filename.encode('ascii'), self.flag_bits
349 except UnicodeEncodeError:
350 return self.filename.encode('utf-8'), self.flag_bits | 0x800
351 else:
352 return self.filename, self.flag_bits
353
354 def _decodeFilename(self):
355 if self.flag_bits & 0x800:
356 return self.filename.decode('utf-8')
357 else:
358 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000359
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000360 def _decodeExtra(self):
361 # Try to decode the extra field.
362 extra = self.extra
363 unpack = struct.unpack
364 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000365 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000366 if tp == 1:
367 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000368 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000369 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000370 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000371 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000372 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000373 elif ln == 0:
374 counts = ()
375 else:
376 raise RuntimeError, "Corrupt extra field %s"%(ln,)
377
378 idx = 0
379
380 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000381 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000382 self.file_size = counts[idx]
383 idx += 1
384
Martin v. Löwis8c436412008-07-03 12:51:14 +0000385 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000386 self.compress_size = counts[idx]
387 idx += 1
388
Martin v. Löwis8c436412008-07-03 12:51:14 +0000389 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000390 old = self.header_offset
391 self.header_offset = counts[idx]
392 idx+=1
393
394 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000395
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000396
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000397class _ZipDecrypter:
398 """Class to handle decryption of files stored within a ZIP archive.
399
400 ZIP supports a password-based form of encryption. Even though known
401 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000402 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000403
404 Usage:
405 zd = _ZipDecrypter(mypwd)
406 plain_char = zd(cypher_char)
407 plain_text = map(zd, cypher_text)
408 """
409
410 def _GenerateCRCTable():
411 """Generate a CRC-32 table.
412
413 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
414 internal keys. We noticed that a direct implementation is faster than
415 relying on binascii.crc32().
416 """
417 poly = 0xedb88320
418 table = [0] * 256
419 for i in range(256):
420 crc = i
421 for j in range(8):
422 if crc & 1:
423 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
424 else:
425 crc = ((crc >> 1) & 0x7FFFFFFF)
426 table[i] = crc
427 return table
428 crctable = _GenerateCRCTable()
429
430 def _crc32(self, ch, crc):
431 """Compute the CRC32 primitive on one byte."""
432 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
433
434 def __init__(self, pwd):
435 self.key0 = 305419896
436 self.key1 = 591751049
437 self.key2 = 878082192
438 for p in pwd:
439 self._UpdateKeys(p)
440
441 def _UpdateKeys(self, c):
442 self.key0 = self._crc32(c, self.key0)
443 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
444 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
445 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
446
447 def __call__(self, c):
448 """Decrypt a single character."""
449 c = ord(c)
450 k = self.key2 | 2
451 c = c ^ (((k * (k^1)) >> 8) & 255)
452 c = chr(c)
453 self._UpdateKeys(c)
454 return c
455
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000456class ZipExtFile(io.BufferedIOBase):
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000457 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000458 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000459 """
Tim Petersea5962f2007-03-12 18:07:52 +0000460
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000461 # Max size supported by decompressor.
462 MAX_N = 1 << 31 - 1
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000463
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000464 # Read from compressed files in 4k blocks.
465 MIN_READ_SIZE = 4096
Tim Petersea5962f2007-03-12 18:07:52 +0000466
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000467 # Search for universal newlines or line chunks.
468 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
469
470 def __init__(self, fileobj, mode, zipinfo, decrypter=None):
471 self._fileobj = fileobj
472 self._decrypter = decrypter
473
Ezio Melotti4611b052010-01-28 01:41:30 +0000474 self._compress_type = zipinfo.compress_type
475 self._compress_size = zipinfo.compress_size
476 self._compress_left = zipinfo.compress_size
477
478 if self._compress_type == ZIP_DEFLATED:
479 self._decompressor = zlib.decompressobj(-15)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000480 self._unconsumed = ''
481
482 self._readbuffer = ''
483 self._offset = 0
484
485 self._universal = 'U' in mode
486 self.newlines = None
487
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000488 # Adjust read size for encrypted files since the first 12 bytes
489 # are for the encryption/password information.
490 if self._decrypter is not None:
491 self._compress_left -= 12
492
493 self.mode = mode
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000494 self.name = zipinfo.filename
495
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000496 def readline(self, limit=-1):
497 """Read and return a line from the stream.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000498
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000499 If limit is specified, at most limit bytes will be read.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000500 """
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000501
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000502 if not self._universal and limit < 0:
503 # Shortcut common case - newline found in buffer.
504 i = self._readbuffer.find('\n', self._offset) + 1
505 if i > 0:
506 line = self._readbuffer[self._offset: i]
507 self._offset = i
508 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000509
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000510 if not self._universal:
511 return io.BufferedIOBase.readline(self, limit)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000512
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000513 line = ''
514 while limit < 0 or len(line) < limit:
515 readahead = self.peek(2)
516 if readahead == '':
517 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000518
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000519 #
520 # Search for universal newlines or line chunks.
521 #
522 # The pattern returns either a line chunk or a newline, but not
523 # both. Combined with peek(2), we are assured that the sequence
524 # '\r\n' is always retrieved completely and never split into
525 # separate newlines - '\r', '\n' due to coincidental readaheads.
526 #
527 match = self.PATTERN.search(readahead)
528 newline = match.group('newline')
529 if newline is not None:
530 if self.newlines is None:
531 self.newlines = []
532 if newline not in self.newlines:
533 self.newlines.append(newline)
534 self._offset += len(newline)
535 return line + '\n'
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000536
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000537 chunk = match.group('chunk')
538 if limit >= 0:
539 chunk = chunk[: limit - len(line)]
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000540
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000541 self._offset += len(chunk)
542 line += chunk
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000543
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000544 return line
545
546 def peek(self, n=1):
547 """Returns buffered bytes without advancing the position."""
548 if n > len(self._readbuffer) - self._offset:
549 chunk = self.read(n)
550 self._offset -= len(chunk)
551
552 # Return up to 512 bytes to reduce allocation overhead for tight loops.
553 return self._readbuffer[self._offset: self._offset + 512]
554
555 def readable(self):
556 return True
557
558 def read(self, n=-1):
559 """Read and return up to n bytes.
560 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000561 """
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000562
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000563 buf = ''
564 while n < 0 or n is None or n > len(buf):
565 data = self.read1(n)
566 if len(data) == 0:
567 return buf
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000568
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000569 buf += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000570
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000571 return buf
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000572
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000573 def read1(self, n):
574 """Read up to n bytes with at most one read() system call."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000575
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000576 # Simplify algorithm (branching) by transforming negative n to large n.
577 if n < 0 or n is None:
578 n = self.MAX_N
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000579
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000580 # Bytes available in read buffer.
581 len_readbuffer = len(self._readbuffer) - self._offset
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000582
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000583 # Read from file.
584 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
585 nbytes = n - len_readbuffer - len(self._unconsumed)
586 nbytes = max(nbytes, self.MIN_READ_SIZE)
587 nbytes = min(nbytes, self._compress_left)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000588
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000589 data = self._fileobj.read(nbytes)
590 self._compress_left -= len(data)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000591
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000592 if data and self._decrypter is not None:
593 data = ''.join(map(self._decrypter, data))
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000594
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000595 if self._compress_type == ZIP_STORED:
596 self._readbuffer = self._readbuffer[self._offset:] + data
597 self._offset = 0
598 else:
599 # Prepare deflated bytes for decompression.
600 self._unconsumed += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000601
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000602 # Handle unconsumed data.
Ezio Melotti4611b052010-01-28 01:41:30 +0000603 if (len(self._unconsumed) > 0 and n > len_readbuffer and
604 self._compress_type == ZIP_DEFLATED):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000605 data = self._decompressor.decompress(
606 self._unconsumed,
607 max(n - len_readbuffer, self.MIN_READ_SIZE)
608 )
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000609
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000610 self._unconsumed = self._decompressor.unconsumed_tail
611 if len(self._unconsumed) == 0 and self._compress_left == 0:
612 data += self._decompressor.flush()
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000613
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000614 self._readbuffer = self._readbuffer[self._offset:] + data
615 self._offset = 0
616
617 # Read from buffer.
618 data = self._readbuffer[self._offset: self._offset + n]
619 self._offset += len(data)
620 return data
621
Tim Petersea5962f2007-03-12 18:07:52 +0000622
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000623
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000624class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000625 """ Class with methods to open, read, write, close, list zip files.
626
Martin v. Löwis8c436412008-07-03 12:51:14 +0000627 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000628
Fred Drake3d9091e2001-03-26 15:49:24 +0000629 file: Either the path to the file, or a file-like object.
630 If it is a path, the file will be opened and closed by ZipFile.
631 mode: The mode can be either read "r", write "w" or append "a".
632 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000633 allowZip64: if True ZipFile will create files with ZIP64 extensions when
634 needed, otherwise it will raise an exception when this would
635 be necessary.
636
Fred Drake3d9091e2001-03-26 15:49:24 +0000637 """
Fred Drake484d7352000-10-02 21:14:52 +0000638
Fred Drake90eac282001-02-28 05:29:34 +0000639 fp = None # Set here since __del__ checks it
640
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000641 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000642 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000643 if mode not in ("r", "w", "a"):
644 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
645
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000646 if compression == ZIP_STORED:
647 pass
648 elif compression == ZIP_DEFLATED:
649 if not zlib:
650 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000651 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000652 else:
653 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000654
655 self._allowZip64 = allowZip64
656 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000657 self.debug = 0 # Level of printing: 0 through 3
658 self.NameToInfo = {} # Find file info given name
659 self.filelist = [] # List of ZipInfo instances for archive
660 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000661 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000662 self.pwd = None
Martin v. Löwis8c436412008-07-03 12:51:14 +0000663 self.comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000664
Fred Drake3d9091e2001-03-26 15:49:24 +0000665 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000666 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000667 self._filePassed = 0
668 self.filename = file
669 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000670 try:
671 self.fp = open(file, modeDict[mode])
672 except IOError:
673 if mode == 'a':
674 mode = key = 'w'
675 self.fp = open(file, modeDict[mode])
676 else:
677 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000678 else:
679 self._filePassed = 1
680 self.fp = file
681 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000682
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000683 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000684 self._GetContents()
685 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000686 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000687 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000688 try: # See if file is a zip file
689 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000690 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000691 self.fp.seek(self.start_dir, 0)
692 except BadZipfile: # file is not a zip file, just append
693 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000694 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000695 if not self._filePassed:
696 self.fp.close()
697 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000698 raise RuntimeError, 'Mode must be "r", "w" or "a"'
699
Ezio Melotti569e61f2009-12-30 06:14:51 +0000700 def __enter__(self):
701 return self
702
703 def __exit__(self, type, value, traceback):
704 self.close()
705
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000706 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000707 """Read the directory, making sure we close the file if the format
708 is bad."""
709 try:
710 self._RealGetContents()
711 except BadZipfile:
712 if not self._filePassed:
713 self.fp.close()
714 self.fp = None
715 raise
716
717 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000718 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000719 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000720 endrec = _EndRecData(fp)
721 if not endrec:
722 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000723 if self.debug > 1:
724 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000725 size_cd = endrec[_ECD_SIZE] # bytes in central directory
726 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
727 self.comment = endrec[_ECD_COMMENT] # archive comment
728
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000729 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000730 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000731 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
732 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000733 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
734
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000735 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000736 inferred = concat + offset_cd
737 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000738 # self.start_dir: Position of start of central directory
739 self.start_dir = offset_cd + concat
740 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000741 data = fp.read(size_cd)
742 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000743 total = 0
744 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000745 centdir = fp.read(sizeCentralDir)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000746 if centdir[0:4] != stringCentralDir:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000747 raise BadZipfile, "Bad magic number for central directory"
748 centdir = struct.unpack(structCentralDir, centdir)
749 if self.debug > 2:
750 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000751 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000752 # Create ZipInfo instance to store file information
753 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000754 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
755 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000756 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000757 (x.create_version, x.create_system, x.extract_version, x.reserved,
758 x.flag_bits, x.compress_type, t, d,
759 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
760 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
761 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000762 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000763 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000764 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000765
766 x._decodeExtra()
767 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000768 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000769 self.filelist.append(x)
770 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000771
772 # update total bytes read from central directory
773 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
774 + centdir[_CD_EXTRA_FIELD_LENGTH]
775 + centdir[_CD_COMMENT_LENGTH])
776
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000777 if self.debug > 2:
778 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000779
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000780
781 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000782 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000783 l = []
784 for data in self.filelist:
785 l.append(data.filename)
786 return l
787
788 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000789 """Return a list of class ZipInfo instances for files in the
790 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000791 return self.filelist
792
793 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000794 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000795 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
796 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000797 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000798 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
799
800 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000801 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000802 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000803 for zinfo in self.filelist:
804 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000805 # Read by chunks, to avoid an OverflowError or a
806 # MemoryError with very large embedded files.
807 f = self.open(zinfo.filename, "r")
808 while f.read(chunk_size): # Check CRC-32
809 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000810 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000811 return zinfo.filename
812
813 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000814 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000815 info = self.NameToInfo.get(name)
816 if info is None:
817 raise KeyError(
818 'There is no item named %r in the archive' % name)
819
820 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000821
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000822 def setpassword(self, pwd):
823 """Set default password for encrypted files."""
824 self.pwd = pwd
825
826 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000827 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000828 return self.open(name, "r", pwd).read()
829
830 def open(self, name, mode="r", pwd=None):
831 """Return file-like object for 'name'."""
832 if mode not in ("r", "U", "rU"):
833 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000834 if not self.fp:
835 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000836 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000837
Tim Petersea5962f2007-03-12 18:07:52 +0000838 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000839 # given a file object in the constructor
840 if self._filePassed:
841 zef_file = self.fp
842 else:
843 zef_file = open(self.filename, 'rb')
844
Georg Brandl112aa502008-05-20 08:25:48 +0000845 # Make sure we have an info object
846 if isinstance(name, ZipInfo):
847 # 'name' is already an info object
848 zinfo = name
849 else:
850 # Get info object for name
851 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000852
853 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000854
855 # Skip the file header:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000856 fheader = zef_file.read(sizeFileHeader)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000857 if fheader[0:4] != stringFileHeader:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000858 raise BadZipfile, "Bad magic number for file header"
859
860 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000861 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000862 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000863 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000864
865 if fname != zinfo.orig_filename:
866 raise BadZipfile, \
867 'File name in directory "%s" and header "%s" differ.' % (
868 zinfo.orig_filename, fname)
869
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000870 # check for encrypted flag & handle password
871 is_encrypted = zinfo.flag_bits & 0x1
872 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000873 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000874 if not pwd:
875 pwd = self.pwd
876 if not pwd:
877 raise RuntimeError, "File %s is encrypted, " \
878 "password required for extraction" % name
879
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000880 zd = _ZipDecrypter(pwd)
881 # The first 12 bytes in the cypher stream is an encryption header
882 # used to strengthen the algorithm. The first 11 bytes are
883 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000884 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000885 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000886 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000887 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000888 if zinfo.flag_bits & 0x8:
889 # compare against the file type from extended local headers
890 check_byte = (zinfo._raw_time >> 8) & 0xff
891 else:
892 # compare against the CRC otherwise
893 check_byte = (zinfo.CRC >> 24) & 0xff
894 if ord(h[11]) != check_byte:
895 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000896
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000897 return ZipExtFile(zef_file, mode, zinfo, zd)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000898
Georg Brandl62416bc2008-01-07 18:47:44 +0000899 def extract(self, member, path=None, pwd=None):
900 """Extract a member from the archive to the current working directory,
901 using its full name. Its file information is extracted as accurately
902 as possible. `member' may be a filename or a ZipInfo object. You can
903 specify a different directory using `path'.
904 """
905 if not isinstance(member, ZipInfo):
906 member = self.getinfo(member)
907
908 if path is None:
909 path = os.getcwd()
910
911 return self._extract_member(member, path, pwd)
912
913 def extractall(self, path=None, members=None, pwd=None):
914 """Extract all members from the archive to the current working
915 directory. `path' specifies a different directory to extract to.
916 `members' is optional and must be a subset of the list returned
917 by namelist().
918 """
919 if members is None:
920 members = self.namelist()
921
922 for zipinfo in members:
923 self.extract(zipinfo, path, pwd)
924
925 def _extract_member(self, member, targetpath, pwd):
926 """Extract the ZipInfo object 'member' to a physical
927 file on the path targetpath.
928 """
929 # build the destination pathname, replacing
930 # forward slashes to platform specific separators.
Antoine Pitrou97377bf2009-05-04 21:17:17 +0000931 # Strip trailing path separator, unless it represents the root.
932 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
933 and len(os.path.splitdrive(targetpath)[1]) > 1):
Georg Brandl62416bc2008-01-07 18:47:44 +0000934 targetpath = targetpath[:-1]
935
936 # don't include leading "/" from file name if present
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000937 if member.filename[0] == '/':
Georg Brandl62416bc2008-01-07 18:47:44 +0000938 targetpath = os.path.join(targetpath, member.filename[1:])
939 else:
940 targetpath = os.path.join(targetpath, member.filename)
941
942 targetpath = os.path.normpath(targetpath)
943
944 # Create all upper directories if necessary.
945 upperdirs = os.path.dirname(targetpath)
946 if upperdirs and not os.path.exists(upperdirs):
947 os.makedirs(upperdirs)
948
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000949 if member.filename[-1] == '/':
Martin v. Löwis0b09c422009-05-24 19:30:52 +0000950 if not os.path.isdir(targetpath):
951 os.mkdir(targetpath)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000952 return targetpath
953
Georg Brandl112aa502008-05-20 08:25:48 +0000954 source = self.open(member, pwd=pwd)
Georg Brandl62416bc2008-01-07 18:47:44 +0000955 target = file(targetpath, "wb")
956 shutil.copyfileobj(source, target)
957 source.close()
958 target.close()
959
960 return targetpath
961
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000962 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000963 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000964 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000965 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000966 print "Duplicate name:", zinfo.filename
967 if self.mode not in ("w", "a"):
968 raise RuntimeError, 'write() requires mode "w" or "a"'
969 if not self.fp:
970 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000971 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000972 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
973 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000974 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000975 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
976 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000977 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000978 if zinfo.file_size > ZIP64_LIMIT:
979 if not self._allowZip64:
980 raise LargeZipFile("Filesize would require ZIP64 extensions")
981 if zinfo.header_offset > ZIP64_LIMIT:
982 if not self._allowZip64:
983 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000984
985 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000986 """Put the bytes from filename into the archive under the name
987 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000988 if not self.fp:
989 raise RuntimeError(
990 "Attempt to write to ZIP archive that was already closed")
991
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000992 st = os.stat(filename)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000993 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000994 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000995 date_time = mtime[0:6]
996 # Create ZipInfo instance to store file information
997 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000998 arcname = filename
999 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1000 while arcname[0] in (os.sep, os.altsep):
1001 arcname = arcname[1:]
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001002 if isdir:
1003 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001004 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001005 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001006 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001007 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001008 else:
Tim Peterse1190062001-01-15 03:34:38 +00001009 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001010
1011 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001012 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001013 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001014
1015 self._writecheck(zinfo)
1016 self._didModify = True
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001017
1018 if isdir:
1019 zinfo.file_size = 0
1020 zinfo.compress_size = 0
1021 zinfo.CRC = 0
1022 self.filelist.append(zinfo)
1023 self.NameToInfo[zinfo.filename] = zinfo
1024 self.fp.write(zinfo.FileHeader())
1025 return
1026
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001027 with open(filename, "rb") as fp:
1028 # Must overwrite CRC and sizes with correct data later
1029 zinfo.CRC = CRC = 0
1030 zinfo.compress_size = compress_size = 0
1031 zinfo.file_size = file_size = 0
1032 self.fp.write(zinfo.FileHeader())
1033 if zinfo.compress_type == ZIP_DEFLATED:
1034 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1035 zlib.DEFLATED, -15)
1036 else:
1037 cmpr = None
1038 while 1:
1039 buf = fp.read(1024 * 8)
1040 if not buf:
1041 break
1042 file_size = file_size + len(buf)
1043 CRC = crc32(buf, CRC) & 0xffffffff
1044 if cmpr:
1045 buf = cmpr.compress(buf)
1046 compress_size = compress_size + len(buf)
1047 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001048 if cmpr:
1049 buf = cmpr.flush()
1050 compress_size = compress_size + len(buf)
1051 self.fp.write(buf)
1052 zinfo.compress_size = compress_size
1053 else:
1054 zinfo.compress_size = file_size
1055 zinfo.CRC = CRC
1056 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001057 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001058 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001059 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001060 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001061 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001062 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001063 self.filelist.append(zinfo)
1064 self.NameToInfo[zinfo.filename] = zinfo
1065
Just van Rossumb083cb32002-12-12 12:23:32 +00001066 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +00001067 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001068 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1069 the name of the file in the archive."""
1070 if not isinstance(zinfo_or_arcname, ZipInfo):
1071 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001072 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001073 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001074 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001075 else:
1076 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001077
1078 if not self.fp:
1079 raise RuntimeError(
1080 "Attempt to write to ZIP archive that was already closed")
1081
Tim Peterse1190062001-01-15 03:34:38 +00001082 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001083 zinfo.header_offset = self.fp.tell() # Start of header bytes
1084 self._writecheck(zinfo)
1085 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001086 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001087 if zinfo.compress_type == ZIP_DEFLATED:
1088 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1089 zlib.DEFLATED, -15)
1090 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001091 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001092 else:
1093 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +00001094 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001095 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001096 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001097 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001098 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001099 # Write CRC and file sizes after the file data
Gregory P. Smith26627332009-06-26 07:50:21 +00001100 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001101 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001102 self.filelist.append(zinfo)
1103 self.NameToInfo[zinfo.filename] = zinfo
1104
1105 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001106 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001107 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001108
1109 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001110 """Close the file, and for mode "w" and "a" write the ending
1111 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001112 if self.fp is None:
1113 return
Tim Petersa608bb22006-06-15 18:06:29 +00001114
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001115 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001116 count = 0
1117 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001118 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001119 count = count + 1
1120 dt = zinfo.date_time
1121 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001122 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001123 extra = []
1124 if zinfo.file_size > ZIP64_LIMIT \
1125 or zinfo.compress_size > ZIP64_LIMIT:
1126 extra.append(zinfo.file_size)
1127 extra.append(zinfo.compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001128 file_size = 0xffffffff
1129 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001130 else:
1131 file_size = zinfo.file_size
1132 compress_size = zinfo.compress_size
1133
1134 if zinfo.header_offset > ZIP64_LIMIT:
1135 extra.append(zinfo.header_offset)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001136 header_offset = 0xffffffffL
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001137 else:
1138 header_offset = zinfo.header_offset
1139
1140 extra_data = zinfo.extra
1141 if extra:
1142 # Append a ZIP64 field to the extra's
1143 extra_data = struct.pack(
Gregory P. Smithb89a0962008-03-19 01:46:10 +00001144 '<HH' + 'Q'*len(extra),
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001145 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001146
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001147 extract_version = max(45, zinfo.extract_version)
1148 create_version = max(45, zinfo.create_version)
1149 else:
1150 extract_version = zinfo.extract_version
1151 create_version = zinfo.create_version
1152
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001153 try:
Martin v. Löwis471617d2008-05-05 17:16:58 +00001154 filename, flag_bits = zinfo._encodeFilenameFlags()
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001155 centdir = struct.pack(structCentralDir,
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001156 stringCentralDir, create_version,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001157 zinfo.create_system, extract_version, zinfo.reserved,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001158 flag_bits, zinfo.compress_type, dostime, dosdate,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001159 zinfo.CRC, compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001160 len(filename), len(extra_data), len(zinfo.comment),
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001161 0, zinfo.internal_attr, zinfo.external_attr,
1162 header_offset)
1163 except DeprecationWarning:
1164 print >>sys.stderr, (structCentralDir,
1165 stringCentralDir, create_version,
1166 zinfo.create_system, extract_version, zinfo.reserved,
1167 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1168 zinfo.CRC, compress_size, file_size,
1169 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1170 0, zinfo.internal_attr, zinfo.external_attr,
1171 header_offset)
1172 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001173 self.fp.write(centdir)
Martin v. Löwis471617d2008-05-05 17:16:58 +00001174 self.fp.write(filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001175 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001176 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001177
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001178 pos2 = self.fp.tell()
1179 # Write end-of-zip-archive record
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001180 centDirCount = count
1181 centDirSize = pos2 - pos1
Martin v. Löwis8c436412008-07-03 12:51:14 +00001182 centDirOffset = pos1
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001183 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1184 centDirOffset > ZIP64_LIMIT or
1185 centDirSize > ZIP64_LIMIT):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001186 # Need to write the ZIP64 end-of-archive records
1187 zip64endrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001188 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001189 44, 45, 45, 0, 0, centDirCount, centDirCount,
1190 centDirSize, centDirOffset)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001191 self.fp.write(zip64endrec)
1192
1193 zip64locrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001194 structEndArchive64Locator,
1195 stringEndArchive64Locator, 0, pos2, 1)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001196 self.fp.write(zip64locrec)
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001197 centDirCount = min(centDirCount, 0xFFFF)
1198 centDirSize = min(centDirSize, 0xFFFFFFFF)
1199 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001200
Martin v. Löwis8c436412008-07-03 12:51:14 +00001201 # check for valid comment length
1202 if len(self.comment) >= ZIP_MAX_COMMENT:
1203 if self.debug > 0:
1204 msg = 'Archive comment is too long; truncating to %d bytes' \
1205 % ZIP_MAX_COMMENT
1206 self.comment = self.comment[:ZIP_MAX_COMMENT]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001207
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001208 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001209 0, 0, centDirCount, centDirCount,
1210 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwis8c436412008-07-03 12:51:14 +00001211 self.fp.write(endrec)
1212 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001213 self.fp.flush()
Martin v. Löwis8c436412008-07-03 12:51:14 +00001214
Fred Drake3d9091e2001-03-26 15:49:24 +00001215 if not self._filePassed:
1216 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001217 self.fp = None
1218
1219
1220class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001221 """Class to create ZIP archives with Python library files and packages."""
1222
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001223 def writepy(self, pathname, basename = ""):
1224 """Add all files from "pathname" to the ZIP archive.
1225
Fred Drake484d7352000-10-02 21:14:52 +00001226 If pathname is a package directory, search the directory and
1227 all package subdirectories recursively for all *.py and enter
1228 the modules into the archive. If pathname is a plain
1229 directory, listdir *.py and enter all modules. Else, pathname
1230 must be a Python *.py file and the module will be put into the
1231 archive. Added modules are always module.pyo or module.pyc.
1232 This method will compile the module.py into module.pyc if
1233 necessary.
1234 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001235 dir, name = os.path.split(pathname)
1236 if os.path.isdir(pathname):
1237 initname = os.path.join(pathname, "__init__.py")
1238 if os.path.isfile(initname):
1239 # This is a package directory, add it
1240 if basename:
1241 basename = "%s/%s" % (basename, name)
1242 else:
1243 basename = name
1244 if self.debug:
1245 print "Adding package in", pathname, "as", basename
1246 fname, arcname = self._get_codename(initname[0:-3], basename)
1247 if self.debug:
1248 print "Adding", arcname
1249 self.write(fname, arcname)
1250 dirlist = os.listdir(pathname)
1251 dirlist.remove("__init__.py")
1252 # Add all *.py files and package subdirectories
1253 for filename in dirlist:
1254 path = os.path.join(pathname, filename)
1255 root, ext = os.path.splitext(filename)
1256 if os.path.isdir(path):
1257 if os.path.isfile(os.path.join(path, "__init__.py")):
1258 # This is a package directory, add it
1259 self.writepy(path, basename) # Recursive call
1260 elif ext == ".py":
1261 fname, arcname = self._get_codename(path[0:-3],
1262 basename)
1263 if self.debug:
1264 print "Adding", arcname
1265 self.write(fname, arcname)
1266 else:
1267 # This is NOT a package directory, add its files at top level
1268 if self.debug:
1269 print "Adding files from directory", pathname
1270 for filename in os.listdir(pathname):
1271 path = os.path.join(pathname, filename)
1272 root, ext = os.path.splitext(filename)
1273 if ext == ".py":
1274 fname, arcname = self._get_codename(path[0:-3],
1275 basename)
1276 if self.debug:
1277 print "Adding", arcname
1278 self.write(fname, arcname)
1279 else:
1280 if pathname[-3:] != ".py":
1281 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001282 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001283 fname, arcname = self._get_codename(pathname[0:-3], basename)
1284 if self.debug:
1285 print "Adding file", arcname
1286 self.write(fname, arcname)
1287
1288 def _get_codename(self, pathname, basename):
1289 """Return (filename, archivename) for the path.
1290
Fred Drake484d7352000-10-02 21:14:52 +00001291 Given a module name path, return the correct file path and
1292 archive name, compiling if necessary. For example, given
1293 /python/lib/string, return (/python/lib/string.pyc, string).
1294 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001295 file_py = pathname + ".py"
1296 file_pyc = pathname + ".pyc"
1297 file_pyo = pathname + ".pyo"
1298 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001299 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001300 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001301 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001302 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001303 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001304 if self.debug:
1305 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001306 try:
1307 py_compile.compile(file_py, file_pyc, None, True)
1308 except py_compile.PyCompileError,err:
1309 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001310 fname = file_pyc
1311 else:
1312 fname = file_pyc
1313 archivename = os.path.split(fname)[1]
1314 if basename:
1315 archivename = "%s/%s" % (basename, archivename)
1316 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001317
1318
1319def main(args = None):
1320 import textwrap
1321 USAGE=textwrap.dedent("""\
1322 Usage:
1323 zipfile.py -l zipfile.zip # Show listing of a zipfile
1324 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1325 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1326 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1327 """)
1328 if args is None:
1329 args = sys.argv[1:]
1330
1331 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1332 print USAGE
1333 sys.exit(1)
1334
1335 if args[0] == '-l':
1336 if len(args) != 2:
1337 print USAGE
1338 sys.exit(1)
1339 zf = ZipFile(args[1], 'r')
1340 zf.printdir()
1341 zf.close()
1342
1343 elif args[0] == '-t':
1344 if len(args) != 2:
1345 print USAGE
1346 sys.exit(1)
1347 zf = ZipFile(args[1], 'r')
1348 zf.testzip()
1349 print "Done testing"
1350
1351 elif args[0] == '-e':
1352 if len(args) != 3:
1353 print USAGE
1354 sys.exit(1)
1355
1356 zf = ZipFile(args[1], 'r')
1357 out = args[2]
1358 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001359 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001360 tgt = os.path.join(out, path[2:])
1361 else:
1362 tgt = os.path.join(out, path)
1363
1364 tgtdir = os.path.dirname(tgt)
1365 if not os.path.exists(tgtdir):
1366 os.makedirs(tgtdir)
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001367 with open(tgt, 'wb') as fp:
1368 fp.write(zf.read(path))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001369 zf.close()
1370
1371 elif args[0] == '-c':
1372 if len(args) < 3:
1373 print USAGE
1374 sys.exit(1)
1375
1376 def addToZip(zf, path, zippath):
1377 if os.path.isfile(path):
1378 zf.write(path, zippath, ZIP_DEFLATED)
1379 elif os.path.isdir(path):
1380 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001381 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001382 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001383 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001384
1385 zf = ZipFile(args[1], 'w', allowZip64=True)
1386 for src in args[2:]:
1387 addToZip(zf, src, os.path.basename(src))
1388
1389 zf.close()
1390
1391if __name__ == "__main__":
1392 main()