blob: a70a1c952935240fc2b2a2d6293a880722181eb0 [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00005import binascii, cStringIO, stat
Antoine Pitrou94c33eb2010-01-27 20:59:50 +00006import io
7import re
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000021
22
23class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000024 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000025 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +000031ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwis8c436412008-07-03 12:51:14 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000045
Martin v. Löwis8c436412008-07-03 12:51:14 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000048structEndArchive = "<4s4H2LH"
49stringEndArchive = "PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000068stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwis8c436412008-07-03 12:51:14 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000095stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwis8c436412008-07-03 12:51:14 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = "PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = "PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000133def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000134 try:
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000135 if _EndRecData(fp):
136 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000137 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000139 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000141def is_zipfile(filename):
142 """Quickly see if a file is a ZIP file by checking the magic number.
143
144 The filename argument may be a file or file-like object too.
145 """
146 result = False
147 try:
148 if hasattr(filename, "read"):
149 result = _check_zipfile(fp=filename)
150 else:
151 with open(filename, "rb") as fp:
152 result = _check_zipfile(fp)
153 except IOError:
154 pass
155 return result
156
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000157def _EndRecData64(fpin, offset, endrec):
158 """
159 Read the ZIP64 end-of-archive records and use that to update endrec
160 """
Martin v. Löwis8c436412008-07-03 12:51:14 +0000161 fpin.seek(offset - sizeEndCentDir64Locator, 2)
162 data = fpin.read(sizeEndCentDir64Locator)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000163 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
164 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000165 return endrec
166
167 if diskno != 0 or disks != 1:
168 raise BadZipfile("zipfiles that span multiple disks are not supported")
169
Tim Petersa608bb22006-06-15 18:06:29 +0000170 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000171 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
172 data = fpin.read(sizeEndCentDir64)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000173 sig, sz, create_version, read_version, disk_num, disk_dir, \
174 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000175 struct.unpack(structEndArchive64, data)
176 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000177 return endrec
178
179 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000180 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000181 endrec[_ECD_DISK_NUMBER] = disk_num
182 endrec[_ECD_DISK_START] = disk_dir
183 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
184 endrec[_ECD_ENTRIES_TOTAL] = dircount2
185 endrec[_ECD_SIZE] = dirsize
186 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000187 return endrec
188
189
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000190def _EndRecData(fpin):
191 """Return data from the "End of Central Directory" record, or None.
192
193 The data is a list of the nine items in the ZIP "End of central dir"
194 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000195
196 # Determine file size
197 fpin.seek(0, 2)
198 filesize = fpin.tell()
199
200 # Check to see if this is ZIP file with no archive comment (the
201 # "end of central directory" structure should be the last item in the
202 # file if this is the case).
Amaury Forgeot d'Arc3e5b0272009-07-28 22:15:30 +0000203 try:
204 fpin.seek(-sizeEndCentDir, 2)
205 except IOError:
206 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000207 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000208 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
Martin v. Löwis8c436412008-07-03 12:51:14 +0000209 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000210 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000211 endrec=list(endrec)
212
213 # Append a blank comment and record start offset
214 endrec.append("")
215 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000216
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000217 # Try to read the "Zip64 end of central directory" structure
218 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000219
220 # Either this is not a ZIP file, or it is a ZIP file with an archive
221 # comment. Search the end of the file for the "end of central directory"
222 # record signature. The comment is the last item in the ZIP file and may be
223 # up to 64K long. It is assumed that the "end of central directory" magic
224 # number does not appear in the comment.
225 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
226 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000227 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000228 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000229 if start >= 0:
230 # found the magic number; attempt to unpack and interpret
231 recData = data[start:start+sizeEndCentDir]
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000232 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwis8c436412008-07-03 12:51:14 +0000233 comment = data[start+sizeEndCentDir:]
234 # check that comment length is correct
235 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000236 # Append the archive comment and start offset
237 endrec.append(comment)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000238 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000239
240 # Try to read the "Zip64 end of central directory" structure
241 return _EndRecData64(fpin, maxCommentStart + start - filesize,
242 endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000243
244 # Unable to find a valid end of central directory structure
245 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000246
Fred Drake484d7352000-10-02 21:14:52 +0000247
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000248class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000249 """Class with attributes describing each file in the ZIP archive."""
250
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000251 __slots__ = (
252 'orig_filename',
253 'filename',
254 'date_time',
255 'compress_type',
256 'comment',
257 'extra',
258 'create_system',
259 'create_version',
260 'extract_version',
261 'reserved',
262 'flag_bits',
263 'volume',
264 'internal_attr',
265 'external_attr',
266 'header_offset',
267 'CRC',
268 'compress_size',
269 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000270 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000271 )
272
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000273 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000274 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000275
276 # Terminate the file name at the first null byte. Null bytes in file
277 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000278 null_byte = filename.find(chr(0))
279 if null_byte >= 0:
280 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000281 # This is used to ensure paths in generated ZIP files always use
282 # forward slashes as the directory separator, as required by the
283 # ZIP format specification.
284 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000285 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000286
Greg Ward8e36d282003-06-18 00:53:06 +0000287 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000288 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000289 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000290 self.compress_type = ZIP_STORED # Type of compression for the file
291 self.comment = "" # Comment for each file
292 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000293 if sys.platform == 'win32':
294 self.create_system = 0 # System which created ZIP archive
295 else:
296 # Assume everything else is unix-y
297 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000298 self.create_version = 20 # Version which created ZIP archive
299 self.extract_version = 20 # Version needed to extract archive
300 self.reserved = 0 # Must be zero
301 self.flag_bits = 0 # ZIP flag bits
302 self.volume = 0 # Volume number of file header
303 self.internal_attr = 0 # Internal attributes
304 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000305 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000306 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000307 # CRC CRC-32 of the uncompressed file
308 # compress_size Size of the compressed file
309 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000310
311 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000312 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000313 dt = self.date_time
314 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000315 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000317 # Set these to zero because we write them after the file data
318 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000319 else:
Tim Peterse1190062001-01-15 03:34:38 +0000320 CRC = self.CRC
321 compress_size = self.compress_size
322 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000323
324 extra = self.extra
325
326 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
327 # File is larger than what fits into a 4 byte integer,
328 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000329 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000330 extra = extra + struct.pack(fmt,
331 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000332 file_size = 0xffffffff
333 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000334 self.extract_version = max(45, self.extract_version)
335 self.create_version = max(45, self.extract_version)
336
Martin v. Löwis471617d2008-05-05 17:16:58 +0000337 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000338 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000339 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000340 self.compress_type, dostime, dosdate, CRC,
341 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000342 len(filename), len(extra))
343 return header + filename + extra
344
345 def _encodeFilenameFlags(self):
346 if isinstance(self.filename, unicode):
347 try:
348 return self.filename.encode('ascii'), self.flag_bits
349 except UnicodeEncodeError:
350 return self.filename.encode('utf-8'), self.flag_bits | 0x800
351 else:
352 return self.filename, self.flag_bits
353
354 def _decodeFilename(self):
355 if self.flag_bits & 0x800:
356 return self.filename.decode('utf-8')
357 else:
358 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000359
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000360 def _decodeExtra(self):
361 # Try to decode the extra field.
362 extra = self.extra
363 unpack = struct.unpack
364 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000365 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000366 if tp == 1:
367 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000368 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000369 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000370 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000371 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000372 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000373 elif ln == 0:
374 counts = ()
375 else:
376 raise RuntimeError, "Corrupt extra field %s"%(ln,)
377
378 idx = 0
379
380 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000381 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000382 self.file_size = counts[idx]
383 idx += 1
384
Martin v. Löwis8c436412008-07-03 12:51:14 +0000385 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000386 self.compress_size = counts[idx]
387 idx += 1
388
Martin v. Löwis8c436412008-07-03 12:51:14 +0000389 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000390 old = self.header_offset
391 self.header_offset = counts[idx]
392 idx+=1
393
394 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000395
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000396
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000397class _ZipDecrypter:
398 """Class to handle decryption of files stored within a ZIP archive.
399
400 ZIP supports a password-based form of encryption. Even though known
401 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000402 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000403
404 Usage:
405 zd = _ZipDecrypter(mypwd)
406 plain_char = zd(cypher_char)
407 plain_text = map(zd, cypher_text)
408 """
409
410 def _GenerateCRCTable():
411 """Generate a CRC-32 table.
412
413 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
414 internal keys. We noticed that a direct implementation is faster than
415 relying on binascii.crc32().
416 """
417 poly = 0xedb88320
418 table = [0] * 256
419 for i in range(256):
420 crc = i
421 for j in range(8):
422 if crc & 1:
423 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
424 else:
425 crc = ((crc >> 1) & 0x7FFFFFFF)
426 table[i] = crc
427 return table
428 crctable = _GenerateCRCTable()
429
430 def _crc32(self, ch, crc):
431 """Compute the CRC32 primitive on one byte."""
432 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
433
434 def __init__(self, pwd):
435 self.key0 = 305419896
436 self.key1 = 591751049
437 self.key2 = 878082192
438 for p in pwd:
439 self._UpdateKeys(p)
440
441 def _UpdateKeys(self, c):
442 self.key0 = self._crc32(c, self.key0)
443 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
444 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
445 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
446
447 def __call__(self, c):
448 """Decrypt a single character."""
449 c = ord(c)
450 k = self.key2 | 2
451 c = c ^ (((k * (k^1)) >> 8) & 255)
452 c = chr(c)
453 self._UpdateKeys(c)
454 return c
455
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000456class ZipExtFile(io.BufferedIOBase):
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000457 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000458 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000459 """
Tim Petersea5962f2007-03-12 18:07:52 +0000460
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000461 # Max size supported by decompressor.
462 MAX_N = 1 << 31 - 1
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000463
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000464 # Read from compressed files in 4k blocks.
465 MIN_READ_SIZE = 4096
Tim Petersea5962f2007-03-12 18:07:52 +0000466
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000467 # Search for universal newlines or line chunks.
468 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
469
470 def __init__(self, fileobj, mode, zipinfo, decrypter=None):
471 self._fileobj = fileobj
472 self._decrypter = decrypter
473
474 self._decompressor = zlib.decompressobj(-15)
475 self._unconsumed = ''
476
477 self._readbuffer = ''
478 self._offset = 0
479
480 self._universal = 'U' in mode
481 self.newlines = None
482
483 self._compress_type = zipinfo.compress_type
484 self._compress_size = zipinfo.compress_size
485 self._compress_left = zipinfo.compress_size
486
487 # Adjust read size for encrypted files since the first 12 bytes
488 # are for the encryption/password information.
489 if self._decrypter is not None:
490 self._compress_left -= 12
491
492 self.mode = mode
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000493 self.name = zipinfo.filename
494
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000495 def readline(self, limit=-1):
496 """Read and return a line from the stream.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000497
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000498 If limit is specified, at most limit bytes will be read.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000499 """
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000500
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000501 if not self._universal and limit < 0:
502 # Shortcut common case - newline found in buffer.
503 i = self._readbuffer.find('\n', self._offset) + 1
504 if i > 0:
505 line = self._readbuffer[self._offset: i]
506 self._offset = i
507 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000508
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000509 if not self._universal:
510 return io.BufferedIOBase.readline(self, limit)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000511
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000512 line = ''
513 while limit < 0 or len(line) < limit:
514 readahead = self.peek(2)
515 if readahead == '':
516 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000517
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000518 #
519 # Search for universal newlines or line chunks.
520 #
521 # The pattern returns either a line chunk or a newline, but not
522 # both. Combined with peek(2), we are assured that the sequence
523 # '\r\n' is always retrieved completely and never split into
524 # separate newlines - '\r', '\n' due to coincidental readaheads.
525 #
526 match = self.PATTERN.search(readahead)
527 newline = match.group('newline')
528 if newline is not None:
529 if self.newlines is None:
530 self.newlines = []
531 if newline not in self.newlines:
532 self.newlines.append(newline)
533 self._offset += len(newline)
534 return line + '\n'
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000535
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000536 chunk = match.group('chunk')
537 if limit >= 0:
538 chunk = chunk[: limit - len(line)]
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000539
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000540 self._offset += len(chunk)
541 line += chunk
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000542
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000543 return line
544
545 def peek(self, n=1):
546 """Returns buffered bytes without advancing the position."""
547 if n > len(self._readbuffer) - self._offset:
548 chunk = self.read(n)
549 self._offset -= len(chunk)
550
551 # Return up to 512 bytes to reduce allocation overhead for tight loops.
552 return self._readbuffer[self._offset: self._offset + 512]
553
554 def readable(self):
555 return True
556
557 def read(self, n=-1):
558 """Read and return up to n bytes.
559 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000560 """
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000561
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000562 buf = ''
563 while n < 0 or n is None or n > len(buf):
564 data = self.read1(n)
565 if len(data) == 0:
566 return buf
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000567
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000568 buf += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000569
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000570 return buf
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000571
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000572 def read1(self, n):
573 """Read up to n bytes with at most one read() system call."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000574
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000575 # Simplify algorithm (branching) by transforming negative n to large n.
576 if n < 0 or n is None:
577 n = self.MAX_N
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000578
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000579 # Bytes available in read buffer.
580 len_readbuffer = len(self._readbuffer) - self._offset
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000581
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000582 # Read from file.
583 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
584 nbytes = n - len_readbuffer - len(self._unconsumed)
585 nbytes = max(nbytes, self.MIN_READ_SIZE)
586 nbytes = min(nbytes, self._compress_left)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000587
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000588 data = self._fileobj.read(nbytes)
589 self._compress_left -= len(data)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000590
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000591 if data and self._decrypter is not None:
592 data = ''.join(map(self._decrypter, data))
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000593
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000594 if self._compress_type == ZIP_STORED:
595 self._readbuffer = self._readbuffer[self._offset:] + data
596 self._offset = 0
597 else:
598 # Prepare deflated bytes for decompression.
599 self._unconsumed += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000600
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000601 # Handle unconsumed data.
602 if len(self._unconsumed) > 0 and n > len_readbuffer:
603 data = self._decompressor.decompress(
604 self._unconsumed,
605 max(n - len_readbuffer, self.MIN_READ_SIZE)
606 )
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000607
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000608 self._unconsumed = self._decompressor.unconsumed_tail
609 if len(self._unconsumed) == 0 and self._compress_left == 0:
610 data += self._decompressor.flush()
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000611
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000612 self._readbuffer = self._readbuffer[self._offset:] + data
613 self._offset = 0
614
615 # Read from buffer.
616 data = self._readbuffer[self._offset: self._offset + n]
617 self._offset += len(data)
618 return data
619
Tim Petersea5962f2007-03-12 18:07:52 +0000620
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000621
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000622class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000623 """ Class with methods to open, read, write, close, list zip files.
624
Martin v. Löwis8c436412008-07-03 12:51:14 +0000625 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000626
Fred Drake3d9091e2001-03-26 15:49:24 +0000627 file: Either the path to the file, or a file-like object.
628 If it is a path, the file will be opened and closed by ZipFile.
629 mode: The mode can be either read "r", write "w" or append "a".
630 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000631 allowZip64: if True ZipFile will create files with ZIP64 extensions when
632 needed, otherwise it will raise an exception when this would
633 be necessary.
634
Fred Drake3d9091e2001-03-26 15:49:24 +0000635 """
Fred Drake484d7352000-10-02 21:14:52 +0000636
Fred Drake90eac282001-02-28 05:29:34 +0000637 fp = None # Set here since __del__ checks it
638
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000639 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000640 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000641 if mode not in ("r", "w", "a"):
642 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
643
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000644 if compression == ZIP_STORED:
645 pass
646 elif compression == ZIP_DEFLATED:
647 if not zlib:
648 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000649 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000650 else:
651 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000652
653 self._allowZip64 = allowZip64
654 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000655 self.debug = 0 # Level of printing: 0 through 3
656 self.NameToInfo = {} # Find file info given name
657 self.filelist = [] # List of ZipInfo instances for archive
658 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000659 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000660 self.pwd = None
Martin v. Löwis8c436412008-07-03 12:51:14 +0000661 self.comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000662
Fred Drake3d9091e2001-03-26 15:49:24 +0000663 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000664 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000665 self._filePassed = 0
666 self.filename = file
667 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000668 try:
669 self.fp = open(file, modeDict[mode])
670 except IOError:
671 if mode == 'a':
672 mode = key = 'w'
673 self.fp = open(file, modeDict[mode])
674 else:
675 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000676 else:
677 self._filePassed = 1
678 self.fp = file
679 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000680
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000681 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000682 self._GetContents()
683 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000684 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000685 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000686 try: # See if file is a zip file
687 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000688 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000689 self.fp.seek(self.start_dir, 0)
690 except BadZipfile: # file is not a zip file, just append
691 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000692 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000693 if not self._filePassed:
694 self.fp.close()
695 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000696 raise RuntimeError, 'Mode must be "r", "w" or "a"'
697
Ezio Melotti569e61f2009-12-30 06:14:51 +0000698 def __enter__(self):
699 return self
700
701 def __exit__(self, type, value, traceback):
702 self.close()
703
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000704 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000705 """Read the directory, making sure we close the file if the format
706 is bad."""
707 try:
708 self._RealGetContents()
709 except BadZipfile:
710 if not self._filePassed:
711 self.fp.close()
712 self.fp = None
713 raise
714
715 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000716 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000717 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000718 endrec = _EndRecData(fp)
719 if not endrec:
720 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000721 if self.debug > 1:
722 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000723 size_cd = endrec[_ECD_SIZE] # bytes in central directory
724 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
725 self.comment = endrec[_ECD_COMMENT] # archive comment
726
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000727 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000728 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000729 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
730 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000731 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
732
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000733 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000734 inferred = concat + offset_cd
735 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000736 # self.start_dir: Position of start of central directory
737 self.start_dir = offset_cd + concat
738 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000739 data = fp.read(size_cd)
740 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000741 total = 0
742 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000743 centdir = fp.read(sizeCentralDir)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000744 if centdir[0:4] != stringCentralDir:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000745 raise BadZipfile, "Bad magic number for central directory"
746 centdir = struct.unpack(structCentralDir, centdir)
747 if self.debug > 2:
748 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000749 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000750 # Create ZipInfo instance to store file information
751 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000752 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
753 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000754 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000755 (x.create_version, x.create_system, x.extract_version, x.reserved,
756 x.flag_bits, x.compress_type, t, d,
757 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
758 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
759 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000760 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000761 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000762 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000763
764 x._decodeExtra()
765 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000766 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000767 self.filelist.append(x)
768 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000769
770 # update total bytes read from central directory
771 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
772 + centdir[_CD_EXTRA_FIELD_LENGTH]
773 + centdir[_CD_COMMENT_LENGTH])
774
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000775 if self.debug > 2:
776 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000777
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000778
779 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000780 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000781 l = []
782 for data in self.filelist:
783 l.append(data.filename)
784 return l
785
786 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000787 """Return a list of class ZipInfo instances for files in the
788 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000789 return self.filelist
790
791 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000792 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000793 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
794 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000795 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000796 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
797
798 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000799 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000800 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000801 for zinfo in self.filelist:
802 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000803 # Read by chunks, to avoid an OverflowError or a
804 # MemoryError with very large embedded files.
805 f = self.open(zinfo.filename, "r")
806 while f.read(chunk_size): # Check CRC-32
807 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000808 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000809 return zinfo.filename
810
811 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000812 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000813 info = self.NameToInfo.get(name)
814 if info is None:
815 raise KeyError(
816 'There is no item named %r in the archive' % name)
817
818 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000819
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000820 def setpassword(self, pwd):
821 """Set default password for encrypted files."""
822 self.pwd = pwd
823
824 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000825 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000826 return self.open(name, "r", pwd).read()
827
828 def open(self, name, mode="r", pwd=None):
829 """Return file-like object for 'name'."""
830 if mode not in ("r", "U", "rU"):
831 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000832 if not self.fp:
833 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000834 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000835
Tim Petersea5962f2007-03-12 18:07:52 +0000836 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000837 # given a file object in the constructor
838 if self._filePassed:
839 zef_file = self.fp
840 else:
841 zef_file = open(self.filename, 'rb')
842
Georg Brandl112aa502008-05-20 08:25:48 +0000843 # Make sure we have an info object
844 if isinstance(name, ZipInfo):
845 # 'name' is already an info object
846 zinfo = name
847 else:
848 # Get info object for name
849 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000850
851 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000852
853 # Skip the file header:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000854 fheader = zef_file.read(sizeFileHeader)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000855 if fheader[0:4] != stringFileHeader:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000856 raise BadZipfile, "Bad magic number for file header"
857
858 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000859 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000860 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000861 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000862
863 if fname != zinfo.orig_filename:
864 raise BadZipfile, \
865 'File name in directory "%s" and header "%s" differ.' % (
866 zinfo.orig_filename, fname)
867
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000868 # check for encrypted flag & handle password
869 is_encrypted = zinfo.flag_bits & 0x1
870 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000871 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000872 if not pwd:
873 pwd = self.pwd
874 if not pwd:
875 raise RuntimeError, "File %s is encrypted, " \
876 "password required for extraction" % name
877
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000878 zd = _ZipDecrypter(pwd)
879 # The first 12 bytes in the cypher stream is an encryption header
880 # used to strengthen the algorithm. The first 11 bytes are
881 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000882 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000883 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000884 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000885 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000886 if zinfo.flag_bits & 0x8:
887 # compare against the file type from extended local headers
888 check_byte = (zinfo._raw_time >> 8) & 0xff
889 else:
890 # compare against the CRC otherwise
891 check_byte = (zinfo.CRC >> 24) & 0xff
892 if ord(h[11]) != check_byte:
893 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000894
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000895 return ZipExtFile(zef_file, mode, zinfo, zd)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000896
Georg Brandl62416bc2008-01-07 18:47:44 +0000897 def extract(self, member, path=None, pwd=None):
898 """Extract a member from the archive to the current working directory,
899 using its full name. Its file information is extracted as accurately
900 as possible. `member' may be a filename or a ZipInfo object. You can
901 specify a different directory using `path'.
902 """
903 if not isinstance(member, ZipInfo):
904 member = self.getinfo(member)
905
906 if path is None:
907 path = os.getcwd()
908
909 return self._extract_member(member, path, pwd)
910
911 def extractall(self, path=None, members=None, pwd=None):
912 """Extract all members from the archive to the current working
913 directory. `path' specifies a different directory to extract to.
914 `members' is optional and must be a subset of the list returned
915 by namelist().
916 """
917 if members is None:
918 members = self.namelist()
919
920 for zipinfo in members:
921 self.extract(zipinfo, path, pwd)
922
923 def _extract_member(self, member, targetpath, pwd):
924 """Extract the ZipInfo object 'member' to a physical
925 file on the path targetpath.
926 """
927 # build the destination pathname, replacing
928 # forward slashes to platform specific separators.
Antoine Pitrou97377bf2009-05-04 21:17:17 +0000929 # Strip trailing path separator, unless it represents the root.
930 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
931 and len(os.path.splitdrive(targetpath)[1]) > 1):
Georg Brandl62416bc2008-01-07 18:47:44 +0000932 targetpath = targetpath[:-1]
933
934 # don't include leading "/" from file name if present
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000935 if member.filename[0] == '/':
Georg Brandl62416bc2008-01-07 18:47:44 +0000936 targetpath = os.path.join(targetpath, member.filename[1:])
937 else:
938 targetpath = os.path.join(targetpath, member.filename)
939
940 targetpath = os.path.normpath(targetpath)
941
942 # Create all upper directories if necessary.
943 upperdirs = os.path.dirname(targetpath)
944 if upperdirs and not os.path.exists(upperdirs):
945 os.makedirs(upperdirs)
946
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000947 if member.filename[-1] == '/':
Martin v. Löwis0b09c422009-05-24 19:30:52 +0000948 if not os.path.isdir(targetpath):
949 os.mkdir(targetpath)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000950 return targetpath
951
Georg Brandl112aa502008-05-20 08:25:48 +0000952 source = self.open(member, pwd=pwd)
Georg Brandl62416bc2008-01-07 18:47:44 +0000953 target = file(targetpath, "wb")
954 shutil.copyfileobj(source, target)
955 source.close()
956 target.close()
957
958 return targetpath
959
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000960 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000961 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000962 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000963 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000964 print "Duplicate name:", zinfo.filename
965 if self.mode not in ("w", "a"):
966 raise RuntimeError, 'write() requires mode "w" or "a"'
967 if not self.fp:
968 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000969 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000970 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
971 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000972 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000973 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
974 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000975 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000976 if zinfo.file_size > ZIP64_LIMIT:
977 if not self._allowZip64:
978 raise LargeZipFile("Filesize would require ZIP64 extensions")
979 if zinfo.header_offset > ZIP64_LIMIT:
980 if not self._allowZip64:
981 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000982
983 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000984 """Put the bytes from filename into the archive under the name
985 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000986 if not self.fp:
987 raise RuntimeError(
988 "Attempt to write to ZIP archive that was already closed")
989
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000990 st = os.stat(filename)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000991 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000992 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000993 date_time = mtime[0:6]
994 # Create ZipInfo instance to store file information
995 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000996 arcname = filename
997 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
998 while arcname[0] in (os.sep, os.altsep):
999 arcname = arcname[1:]
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001000 if isdir:
1001 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001002 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001003 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001004 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001005 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001006 else:
Tim Peterse1190062001-01-15 03:34:38 +00001007 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001008
1009 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001010 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001011 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001012
1013 self._writecheck(zinfo)
1014 self._didModify = True
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001015
1016 if isdir:
1017 zinfo.file_size = 0
1018 zinfo.compress_size = 0
1019 zinfo.CRC = 0
1020 self.filelist.append(zinfo)
1021 self.NameToInfo[zinfo.filename] = zinfo
1022 self.fp.write(zinfo.FileHeader())
1023 return
1024
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001025 with open(filename, "rb") as fp:
1026 # Must overwrite CRC and sizes with correct data later
1027 zinfo.CRC = CRC = 0
1028 zinfo.compress_size = compress_size = 0
1029 zinfo.file_size = file_size = 0
1030 self.fp.write(zinfo.FileHeader())
1031 if zinfo.compress_type == ZIP_DEFLATED:
1032 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1033 zlib.DEFLATED, -15)
1034 else:
1035 cmpr = None
1036 while 1:
1037 buf = fp.read(1024 * 8)
1038 if not buf:
1039 break
1040 file_size = file_size + len(buf)
1041 CRC = crc32(buf, CRC) & 0xffffffff
1042 if cmpr:
1043 buf = cmpr.compress(buf)
1044 compress_size = compress_size + len(buf)
1045 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001046 if cmpr:
1047 buf = cmpr.flush()
1048 compress_size = compress_size + len(buf)
1049 self.fp.write(buf)
1050 zinfo.compress_size = compress_size
1051 else:
1052 zinfo.compress_size = file_size
1053 zinfo.CRC = CRC
1054 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001055 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001056 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001057 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001058 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001059 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001060 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001061 self.filelist.append(zinfo)
1062 self.NameToInfo[zinfo.filename] = zinfo
1063
Just van Rossumb083cb32002-12-12 12:23:32 +00001064 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +00001065 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001066 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1067 the name of the file in the archive."""
1068 if not isinstance(zinfo_or_arcname, ZipInfo):
1069 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001070 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001071 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001072 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001073 else:
1074 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001075
1076 if not self.fp:
1077 raise RuntimeError(
1078 "Attempt to write to ZIP archive that was already closed")
1079
Tim Peterse1190062001-01-15 03:34:38 +00001080 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001081 zinfo.header_offset = self.fp.tell() # Start of header bytes
1082 self._writecheck(zinfo)
1083 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001084 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001085 if zinfo.compress_type == ZIP_DEFLATED:
1086 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1087 zlib.DEFLATED, -15)
1088 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001089 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001090 else:
1091 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +00001092 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001093 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001094 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001095 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001096 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001097 # Write CRC and file sizes after the file data
Gregory P. Smith26627332009-06-26 07:50:21 +00001098 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001099 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001100 self.filelist.append(zinfo)
1101 self.NameToInfo[zinfo.filename] = zinfo
1102
1103 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001104 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001105 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001106
1107 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001108 """Close the file, and for mode "w" and "a" write the ending
1109 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001110 if self.fp is None:
1111 return
Tim Petersa608bb22006-06-15 18:06:29 +00001112
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001113 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001114 count = 0
1115 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001116 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001117 count = count + 1
1118 dt = zinfo.date_time
1119 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001120 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001121 extra = []
1122 if zinfo.file_size > ZIP64_LIMIT \
1123 or zinfo.compress_size > ZIP64_LIMIT:
1124 extra.append(zinfo.file_size)
1125 extra.append(zinfo.compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001126 file_size = 0xffffffff
1127 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001128 else:
1129 file_size = zinfo.file_size
1130 compress_size = zinfo.compress_size
1131
1132 if zinfo.header_offset > ZIP64_LIMIT:
1133 extra.append(zinfo.header_offset)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001134 header_offset = 0xffffffffL
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001135 else:
1136 header_offset = zinfo.header_offset
1137
1138 extra_data = zinfo.extra
1139 if extra:
1140 # Append a ZIP64 field to the extra's
1141 extra_data = struct.pack(
Gregory P. Smithb89a0962008-03-19 01:46:10 +00001142 '<HH' + 'Q'*len(extra),
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001143 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001144
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001145 extract_version = max(45, zinfo.extract_version)
1146 create_version = max(45, zinfo.create_version)
1147 else:
1148 extract_version = zinfo.extract_version
1149 create_version = zinfo.create_version
1150
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001151 try:
Martin v. Löwis471617d2008-05-05 17:16:58 +00001152 filename, flag_bits = zinfo._encodeFilenameFlags()
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001153 centdir = struct.pack(structCentralDir,
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001154 stringCentralDir, create_version,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001155 zinfo.create_system, extract_version, zinfo.reserved,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001156 flag_bits, zinfo.compress_type, dostime, dosdate,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001157 zinfo.CRC, compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001158 len(filename), len(extra_data), len(zinfo.comment),
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001159 0, zinfo.internal_attr, zinfo.external_attr,
1160 header_offset)
1161 except DeprecationWarning:
1162 print >>sys.stderr, (structCentralDir,
1163 stringCentralDir, create_version,
1164 zinfo.create_system, extract_version, zinfo.reserved,
1165 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1166 zinfo.CRC, compress_size, file_size,
1167 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1168 0, zinfo.internal_attr, zinfo.external_attr,
1169 header_offset)
1170 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001171 self.fp.write(centdir)
Martin v. Löwis471617d2008-05-05 17:16:58 +00001172 self.fp.write(filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001173 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001174 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001175
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001176 pos2 = self.fp.tell()
1177 # Write end-of-zip-archive record
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001178 centDirCount = count
1179 centDirSize = pos2 - pos1
Martin v. Löwis8c436412008-07-03 12:51:14 +00001180 centDirOffset = pos1
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001181 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1182 centDirOffset > ZIP64_LIMIT or
1183 centDirSize > ZIP64_LIMIT):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001184 # Need to write the ZIP64 end-of-archive records
1185 zip64endrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001186 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001187 44, 45, 45, 0, 0, centDirCount, centDirCount,
1188 centDirSize, centDirOffset)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001189 self.fp.write(zip64endrec)
1190
1191 zip64locrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001192 structEndArchive64Locator,
1193 stringEndArchive64Locator, 0, pos2, 1)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001194 self.fp.write(zip64locrec)
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001195 centDirCount = min(centDirCount, 0xFFFF)
1196 centDirSize = min(centDirSize, 0xFFFFFFFF)
1197 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001198
Martin v. Löwis8c436412008-07-03 12:51:14 +00001199 # check for valid comment length
1200 if len(self.comment) >= ZIP_MAX_COMMENT:
1201 if self.debug > 0:
1202 msg = 'Archive comment is too long; truncating to %d bytes' \
1203 % ZIP_MAX_COMMENT
1204 self.comment = self.comment[:ZIP_MAX_COMMENT]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001205
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001206 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001207 0, 0, centDirCount, centDirCount,
1208 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwis8c436412008-07-03 12:51:14 +00001209 self.fp.write(endrec)
1210 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001211 self.fp.flush()
Martin v. Löwis8c436412008-07-03 12:51:14 +00001212
Fred Drake3d9091e2001-03-26 15:49:24 +00001213 if not self._filePassed:
1214 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001215 self.fp = None
1216
1217
1218class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001219 """Class to create ZIP archives with Python library files and packages."""
1220
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001221 def writepy(self, pathname, basename = ""):
1222 """Add all files from "pathname" to the ZIP archive.
1223
Fred Drake484d7352000-10-02 21:14:52 +00001224 If pathname is a package directory, search the directory and
1225 all package subdirectories recursively for all *.py and enter
1226 the modules into the archive. If pathname is a plain
1227 directory, listdir *.py and enter all modules. Else, pathname
1228 must be a Python *.py file and the module will be put into the
1229 archive. Added modules are always module.pyo or module.pyc.
1230 This method will compile the module.py into module.pyc if
1231 necessary.
1232 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001233 dir, name = os.path.split(pathname)
1234 if os.path.isdir(pathname):
1235 initname = os.path.join(pathname, "__init__.py")
1236 if os.path.isfile(initname):
1237 # This is a package directory, add it
1238 if basename:
1239 basename = "%s/%s" % (basename, name)
1240 else:
1241 basename = name
1242 if self.debug:
1243 print "Adding package in", pathname, "as", basename
1244 fname, arcname = self._get_codename(initname[0:-3], basename)
1245 if self.debug:
1246 print "Adding", arcname
1247 self.write(fname, arcname)
1248 dirlist = os.listdir(pathname)
1249 dirlist.remove("__init__.py")
1250 # Add all *.py files and package subdirectories
1251 for filename in dirlist:
1252 path = os.path.join(pathname, filename)
1253 root, ext = os.path.splitext(filename)
1254 if os.path.isdir(path):
1255 if os.path.isfile(os.path.join(path, "__init__.py")):
1256 # This is a package directory, add it
1257 self.writepy(path, basename) # Recursive call
1258 elif ext == ".py":
1259 fname, arcname = self._get_codename(path[0:-3],
1260 basename)
1261 if self.debug:
1262 print "Adding", arcname
1263 self.write(fname, arcname)
1264 else:
1265 # This is NOT a package directory, add its files at top level
1266 if self.debug:
1267 print "Adding files from directory", pathname
1268 for filename in os.listdir(pathname):
1269 path = os.path.join(pathname, filename)
1270 root, ext = os.path.splitext(filename)
1271 if ext == ".py":
1272 fname, arcname = self._get_codename(path[0:-3],
1273 basename)
1274 if self.debug:
1275 print "Adding", arcname
1276 self.write(fname, arcname)
1277 else:
1278 if pathname[-3:] != ".py":
1279 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001280 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001281 fname, arcname = self._get_codename(pathname[0:-3], basename)
1282 if self.debug:
1283 print "Adding file", arcname
1284 self.write(fname, arcname)
1285
1286 def _get_codename(self, pathname, basename):
1287 """Return (filename, archivename) for the path.
1288
Fred Drake484d7352000-10-02 21:14:52 +00001289 Given a module name path, return the correct file path and
1290 archive name, compiling if necessary. For example, given
1291 /python/lib/string, return (/python/lib/string.pyc, string).
1292 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001293 file_py = pathname + ".py"
1294 file_pyc = pathname + ".pyc"
1295 file_pyo = pathname + ".pyo"
1296 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001297 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001298 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001299 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001300 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001301 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001302 if self.debug:
1303 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001304 try:
1305 py_compile.compile(file_py, file_pyc, None, True)
1306 except py_compile.PyCompileError,err:
1307 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001308 fname = file_pyc
1309 else:
1310 fname = file_pyc
1311 archivename = os.path.split(fname)[1]
1312 if basename:
1313 archivename = "%s/%s" % (basename, archivename)
1314 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001315
1316
1317def main(args = None):
1318 import textwrap
1319 USAGE=textwrap.dedent("""\
1320 Usage:
1321 zipfile.py -l zipfile.zip # Show listing of a zipfile
1322 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1323 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1324 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1325 """)
1326 if args is None:
1327 args = sys.argv[1:]
1328
1329 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1330 print USAGE
1331 sys.exit(1)
1332
1333 if args[0] == '-l':
1334 if len(args) != 2:
1335 print USAGE
1336 sys.exit(1)
1337 zf = ZipFile(args[1], 'r')
1338 zf.printdir()
1339 zf.close()
1340
1341 elif args[0] == '-t':
1342 if len(args) != 2:
1343 print USAGE
1344 sys.exit(1)
1345 zf = ZipFile(args[1], 'r')
1346 zf.testzip()
1347 print "Done testing"
1348
1349 elif args[0] == '-e':
1350 if len(args) != 3:
1351 print USAGE
1352 sys.exit(1)
1353
1354 zf = ZipFile(args[1], 'r')
1355 out = args[2]
1356 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001357 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001358 tgt = os.path.join(out, path[2:])
1359 else:
1360 tgt = os.path.join(out, path)
1361
1362 tgtdir = os.path.dirname(tgt)
1363 if not os.path.exists(tgtdir):
1364 os.makedirs(tgtdir)
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001365 with open(tgt, 'wb') as fp:
1366 fp.write(zf.read(path))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001367 zf.close()
1368
1369 elif args[0] == '-c':
1370 if len(args) < 3:
1371 print USAGE
1372 sys.exit(1)
1373
1374 def addToZip(zf, path, zippath):
1375 if os.path.isfile(path):
1376 zf.write(path, zippath, ZIP_DEFLATED)
1377 elif os.path.isdir(path):
1378 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001379 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001380 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001381 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001382
1383 zf = ZipFile(args[1], 'w', allowZip64=True)
1384 for src in args[2:]:
1385 addToZip(zf, src, os.path.basename(src))
1386
1387 zf.close()
1388
1389if __name__ == "__main__":
1390 main()