blob: 89b4166f388fe77053b073144aa67e7dce338aa4 [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00005import binascii, cStringIO, stat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00006
7try:
Tim Peterse1190062001-01-15 03:34:38 +00008 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +00009 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000010except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000011 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000012 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013
Skip Montanaro40fc1602001-03-01 04:27:19 +000014__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000015 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000016
Fred Drake5db246d2000-09-29 20:44:48 +000017class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000019
20
21class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000022 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000023 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
24 and those extensions are disabled.
25 """
26
Tim Peterse1190062001-01-15 03:34:38 +000027error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +000029ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000030ZIP_FILECOUNT_LIMIT = 1 << 16
31ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000032
Guido van Rossum32abe6f2000-03-31 17:30:02 +000033# constants for Zip file compression methods
34ZIP_STORED = 0
35ZIP_DEFLATED = 8
36# Other ZIP compression methods not supported
37
Martin v. Löwis8c436412008-07-03 12:51:14 +000038# Below are some formats and associated data for reading/writing headers using
39# the struct module. The names and structures of headers/records are those used
40# in the PKWARE description of the ZIP file format:
41# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
42# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000043
Martin v. Löwis8c436412008-07-03 12:51:14 +000044# The "end of central directory" structure, magic number, size, and indices
45# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000046structEndArchive = "<4s4H2LH"
47stringEndArchive = "PK\005\006"
48sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000049
50_ECD_SIGNATURE = 0
51_ECD_DISK_NUMBER = 1
52_ECD_DISK_START = 2
53_ECD_ENTRIES_THIS_DISK = 3
54_ECD_ENTRIES_TOTAL = 4
55_ECD_SIZE = 5
56_ECD_OFFSET = 6
57_ECD_COMMENT_SIZE = 7
58# These last two indices are not part of the structure as defined in the
59# spec, but they are used internally by this module as a convenience
60_ECD_COMMENT = 8
61_ECD_LOCATION = 9
62
63# The "central directory" structure, magic number, size, and indices
64# of entries in the structure (section V.F in the format document)
65structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000066stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000067sizeCentralDir = struct.calcsize(structCentralDir)
68
Fred Drake3e038e52001-02-28 17:56:26 +000069# indexes of entries in the central directory structure
70_CD_SIGNATURE = 0
71_CD_CREATE_VERSION = 1
72_CD_CREATE_SYSTEM = 2
73_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000074_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000075_CD_FLAG_BITS = 5
76_CD_COMPRESS_TYPE = 6
77_CD_TIME = 7
78_CD_DATE = 8
79_CD_CRC = 9
80_CD_COMPRESSED_SIZE = 10
81_CD_UNCOMPRESSED_SIZE = 11
82_CD_FILENAME_LENGTH = 12
83_CD_EXTRA_FIELD_LENGTH = 13
84_CD_COMMENT_LENGTH = 14
85_CD_DISK_NUMBER_START = 15
86_CD_INTERNAL_FILE_ATTRIBUTES = 16
87_CD_EXTERNAL_FILE_ATTRIBUTES = 17
88_CD_LOCAL_HEADER_OFFSET = 18
89
Martin v. Löwis8c436412008-07-03 12:51:14 +000090# The "local file header" structure, magic number, size, and indices
91# (section V.A in the format document)
92structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000093stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000094sizeFileHeader = struct.calcsize(structFileHeader)
95
Fred Drake3e038e52001-02-28 17:56:26 +000096_FH_SIGNATURE = 0
97_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000098_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +000099_FH_GENERAL_PURPOSE_FLAG_BITS = 3
100_FH_COMPRESSION_METHOD = 4
101_FH_LAST_MOD_TIME = 5
102_FH_LAST_MOD_DATE = 6
103_FH_CRC = 7
104_FH_COMPRESSED_SIZE = 8
105_FH_UNCOMPRESSED_SIZE = 9
106_FH_FILENAME_LENGTH = 10
107_FH_EXTRA_FIELD_LENGTH = 11
108
Martin v. Löwis8c436412008-07-03 12:51:14 +0000109# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000110structEndArchive64Locator = "<4sLQL"
111stringEndArchive64Locator = "PK\x06\x07"
112sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000113
114# The "Zip64 end of central directory" record, magic number, size, and indices
115# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000116structEndArchive64 = "<4sQ2H2L4Q"
117stringEndArchive64 = "PK\x06\x06"
118sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000119
120_CD64_SIGNATURE = 0
121_CD64_DIRECTORY_RECSIZE = 1
122_CD64_CREATE_VERSION = 2
123_CD64_EXTRACT_VERSION = 3
124_CD64_DISK_NUMBER = 4
125_CD64_DISK_NUMBER_START = 5
126_CD64_NUMBER_ENTRIES_THIS_DISK = 6
127_CD64_NUMBER_ENTRIES_TOTAL = 7
128_CD64_DIRECTORY_SIZE = 8
129_CD64_OFFSET_START_CENTDIR = 9
130
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000131def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000132 try:
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000133 if _EndRecData(fp):
134 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000135 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000136 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000137 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000139def is_zipfile(filename):
140 """Quickly see if a file is a ZIP file by checking the magic number.
141
142 The filename argument may be a file or file-like object too.
143 """
144 result = False
145 try:
146 if hasattr(filename, "read"):
147 result = _check_zipfile(fp=filename)
148 else:
149 with open(filename, "rb") as fp:
150 result = _check_zipfile(fp)
151 except IOError:
152 pass
153 return result
154
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000155def _EndRecData64(fpin, offset, endrec):
156 """
157 Read the ZIP64 end-of-archive records and use that to update endrec
158 """
Martin v. Löwis8c436412008-07-03 12:51:14 +0000159 fpin.seek(offset - sizeEndCentDir64Locator, 2)
160 data = fpin.read(sizeEndCentDir64Locator)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000161 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
162 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000163 return endrec
164
165 if diskno != 0 or disks != 1:
166 raise BadZipfile("zipfiles that span multiple disks are not supported")
167
Tim Petersa608bb22006-06-15 18:06:29 +0000168 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000169 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
170 data = fpin.read(sizeEndCentDir64)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000171 sig, sz, create_version, read_version, disk_num, disk_dir, \
172 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000173 struct.unpack(structEndArchive64, data)
174 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000175 return endrec
176
177 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000178 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000179 endrec[_ECD_DISK_NUMBER] = disk_num
180 endrec[_ECD_DISK_START] = disk_dir
181 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
182 endrec[_ECD_ENTRIES_TOTAL] = dircount2
183 endrec[_ECD_SIZE] = dirsize
184 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000185 return endrec
186
187
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000188def _EndRecData(fpin):
189 """Return data from the "End of Central Directory" record, or None.
190
191 The data is a list of the nine items in the ZIP "End of central dir"
192 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000193
194 # Determine file size
195 fpin.seek(0, 2)
196 filesize = fpin.tell()
197
198 # Check to see if this is ZIP file with no archive comment (the
199 # "end of central directory" structure should be the last item in the
200 # file if this is the case).
Amaury Forgeot d'Arc3e5b0272009-07-28 22:15:30 +0000201 try:
202 fpin.seek(-sizeEndCentDir, 2)
203 except IOError:
204 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000205 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000206 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
Martin v. Löwis8c436412008-07-03 12:51:14 +0000207 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000208 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000209 endrec=list(endrec)
210
211 # Append a blank comment and record start offset
212 endrec.append("")
213 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000214
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000215 # Try to read the "Zip64 end of central directory" structure
216 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000217
218 # Either this is not a ZIP file, or it is a ZIP file with an archive
219 # comment. Search the end of the file for the "end of central directory"
220 # record signature. The comment is the last item in the ZIP file and may be
221 # up to 64K long. It is assumed that the "end of central directory" magic
222 # number does not appear in the comment.
223 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
224 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000225 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000226 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000227 if start >= 0:
228 # found the magic number; attempt to unpack and interpret
229 recData = data[start:start+sizeEndCentDir]
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000230 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwis8c436412008-07-03 12:51:14 +0000231 comment = data[start+sizeEndCentDir:]
232 # check that comment length is correct
233 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000234 # Append the archive comment and start offset
235 endrec.append(comment)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000236 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000237
238 # Try to read the "Zip64 end of central directory" structure
239 return _EndRecData64(fpin, maxCommentStart + start - filesize,
240 endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000241
242 # Unable to find a valid end of central directory structure
243 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000244
Fred Drake484d7352000-10-02 21:14:52 +0000245
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000246class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000247 """Class with attributes describing each file in the ZIP archive."""
248
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000249 __slots__ = (
250 'orig_filename',
251 'filename',
252 'date_time',
253 'compress_type',
254 'comment',
255 'extra',
256 'create_system',
257 'create_version',
258 'extract_version',
259 'reserved',
260 'flag_bits',
261 'volume',
262 'internal_attr',
263 'external_attr',
264 'header_offset',
265 'CRC',
266 'compress_size',
267 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000268 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000269 )
270
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000271 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000272 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000273
274 # Terminate the file name at the first null byte. Null bytes in file
275 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000276 null_byte = filename.find(chr(0))
277 if null_byte >= 0:
278 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000279 # This is used to ensure paths in generated ZIP files always use
280 # forward slashes as the directory separator, as required by the
281 # ZIP format specification.
282 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000283 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000284
Greg Ward8e36d282003-06-18 00:53:06 +0000285 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000286 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000287 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000288 self.compress_type = ZIP_STORED # Type of compression for the file
289 self.comment = "" # Comment for each file
290 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000291 if sys.platform == 'win32':
292 self.create_system = 0 # System which created ZIP archive
293 else:
294 # Assume everything else is unix-y
295 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000296 self.create_version = 20 # Version which created ZIP archive
297 self.extract_version = 20 # Version needed to extract archive
298 self.reserved = 0 # Must be zero
299 self.flag_bits = 0 # ZIP flag bits
300 self.volume = 0 # Volume number of file header
301 self.internal_attr = 0 # Internal attributes
302 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000303 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000304 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000305 # CRC CRC-32 of the uncompressed file
306 # compress_size Size of the compressed file
307 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000308
309 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000310 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000311 dt = self.date_time
312 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000313 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000314 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000315 # Set these to zero because we write them after the file data
316 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000317 else:
Tim Peterse1190062001-01-15 03:34:38 +0000318 CRC = self.CRC
319 compress_size = self.compress_size
320 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000321
322 extra = self.extra
323
324 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
325 # File is larger than what fits into a 4 byte integer,
326 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000327 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000328 extra = extra + struct.pack(fmt,
329 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000330 file_size = 0xffffffff
331 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000332 self.extract_version = max(45, self.extract_version)
333 self.create_version = max(45, self.extract_version)
334
Martin v. Löwis471617d2008-05-05 17:16:58 +0000335 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000336 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000337 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000338 self.compress_type, dostime, dosdate, CRC,
339 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000340 len(filename), len(extra))
341 return header + filename + extra
342
343 def _encodeFilenameFlags(self):
344 if isinstance(self.filename, unicode):
345 try:
346 return self.filename.encode('ascii'), self.flag_bits
347 except UnicodeEncodeError:
348 return self.filename.encode('utf-8'), self.flag_bits | 0x800
349 else:
350 return self.filename, self.flag_bits
351
352 def _decodeFilename(self):
353 if self.flag_bits & 0x800:
354 return self.filename.decode('utf-8')
355 else:
356 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000357
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000358 def _decodeExtra(self):
359 # Try to decode the extra field.
360 extra = self.extra
361 unpack = struct.unpack
362 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000363 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000364 if tp == 1:
365 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000366 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000367 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000368 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000369 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000370 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000371 elif ln == 0:
372 counts = ()
373 else:
374 raise RuntimeError, "Corrupt extra field %s"%(ln,)
375
376 idx = 0
377
378 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000379 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000380 self.file_size = counts[idx]
381 idx += 1
382
Martin v. Löwis8c436412008-07-03 12:51:14 +0000383 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000384 self.compress_size = counts[idx]
385 idx += 1
386
Martin v. Löwis8c436412008-07-03 12:51:14 +0000387 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000388 old = self.header_offset
389 self.header_offset = counts[idx]
390 idx+=1
391
392 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000393
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000394
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000395class _ZipDecrypter:
396 """Class to handle decryption of files stored within a ZIP archive.
397
398 ZIP supports a password-based form of encryption. Even though known
399 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000400 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000401
402 Usage:
403 zd = _ZipDecrypter(mypwd)
404 plain_char = zd(cypher_char)
405 plain_text = map(zd, cypher_text)
406 """
407
408 def _GenerateCRCTable():
409 """Generate a CRC-32 table.
410
411 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
412 internal keys. We noticed that a direct implementation is faster than
413 relying on binascii.crc32().
414 """
415 poly = 0xedb88320
416 table = [0] * 256
417 for i in range(256):
418 crc = i
419 for j in range(8):
420 if crc & 1:
421 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
422 else:
423 crc = ((crc >> 1) & 0x7FFFFFFF)
424 table[i] = crc
425 return table
426 crctable = _GenerateCRCTable()
427
428 def _crc32(self, ch, crc):
429 """Compute the CRC32 primitive on one byte."""
430 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
431
432 def __init__(self, pwd):
433 self.key0 = 305419896
434 self.key1 = 591751049
435 self.key2 = 878082192
436 for p in pwd:
437 self._UpdateKeys(p)
438
439 def _UpdateKeys(self, c):
440 self.key0 = self._crc32(c, self.key0)
441 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
442 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
443 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
444
445 def __call__(self, c):
446 """Decrypt a single character."""
447 c = ord(c)
448 k = self.key2 | 2
449 c = c ^ (((k * (k^1)) >> 8) & 255)
450 c = chr(c)
451 self._UpdateKeys(c)
452 return c
453
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000454class ZipExtFile:
455 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000456 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000457 """
Tim Petersea5962f2007-03-12 18:07:52 +0000458
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000459 def __init__(self, fileobj, zipinfo, decrypt=None):
460 self.fileobj = fileobj
461 self.decrypter = decrypt
462 self.bytes_read = 0L
463 self.rawbuffer = ''
464 self.readbuffer = ''
465 self.linebuffer = ''
466 self.eof = False
467 self.univ_newlines = False
468 self.nlSeps = ("\n", )
469 self.lastdiscard = ''
470
471 self.compress_type = zipinfo.compress_type
472 self.compress_size = zipinfo.compress_size
Tim Petersea5962f2007-03-12 18:07:52 +0000473
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000474 self.closed = False
475 self.mode = "r"
476 self.name = zipinfo.filename
477
478 # read from compressed files in 64k blocks
479 self.compreadsize = 64*1024
480 if self.compress_type == ZIP_DEFLATED:
481 self.dc = zlib.decompressobj(-15)
482
483 def set_univ_newlines(self, univ_newlines):
484 self.univ_newlines = univ_newlines
Tim Petersea5962f2007-03-12 18:07:52 +0000485
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000486 # pick line separator char(s) based on universal newlines flag
487 self.nlSeps = ("\n", )
488 if self.univ_newlines:
489 self.nlSeps = ("\r\n", "\r", "\n")
490
491 def __iter__(self):
492 return self
Tim Petersea5962f2007-03-12 18:07:52 +0000493
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000494 def next(self):
495 nextline = self.readline()
496 if not nextline:
497 raise StopIteration()
498
499 return nextline
500
501 def close(self):
502 self.closed = True
503
504 def _checkfornewline(self):
505 nl, nllen = -1, -1
506 if self.linebuffer:
507 # ugly check for cases where half of an \r\n pair was
508 # read on the last pass, and the \r was discarded. In this
509 # case we just throw away the \n at the start of the buffer.
510 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
511 self.linebuffer = self.linebuffer[1:]
512
Tim Petersea5962f2007-03-12 18:07:52 +0000513 for sep in self.nlSeps:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000514 nl = self.linebuffer.find(sep)
515 if nl >= 0:
516 nllen = len(sep)
517 return nl, nllen
518
519 return nl, nllen
Tim Petersea5962f2007-03-12 18:07:52 +0000520
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000521 def readline(self, size = -1):
522 """Read a line with approx. size. If size is negative,
Tim Petersea5962f2007-03-12 18:07:52 +0000523 read a whole line.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000524 """
525 if size < 0:
526 size = sys.maxint
527 elif size == 0:
528 return ''
529
530 # check for a newline already in buffer
531 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000532
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000533 if nl >= 0:
534 # the next line was already in the buffer
535 nl = min(nl, size)
536 else:
537 # no line break in buffer - try to read more
538 size -= len(self.linebuffer)
539 while nl < 0 and size > 0:
540 buf = self.read(min(size, 100))
541 if not buf:
542 break
543 self.linebuffer += buf
544 size -= len(buf)
545
546 # check for a newline in buffer
547 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000548
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000549 # we either ran out of bytes in the file, or
550 # met the specified size limit without finding a newline,
551 # so return current buffer
552 if nl < 0:
553 s = self.linebuffer
554 self.linebuffer = ''
555 return s
556
557 buf = self.linebuffer[:nl]
558 self.lastdiscard = self.linebuffer[nl:nl + nllen]
559 self.linebuffer = self.linebuffer[nl + nllen:]
560
561 # line is always returned with \n as newline char (except possibly
562 # for a final incomplete line in the file, which is handled above).
563 return buf + "\n"
564
565 def readlines(self, sizehint = -1):
566 """Return a list with all (following) lines. The sizehint parameter
567 is ignored in this implementation.
568 """
569 result = []
570 while True:
571 line = self.readline()
572 if not line: break
573 result.append(line)
574 return result
575
576 def read(self, size = None):
577 # act like file() obj and return empty string if size is 0
578 if size == 0:
579 return ''
580
581 # determine read size
582 bytesToRead = self.compress_size - self.bytes_read
583
584 # adjust read size for encrypted files since the first 12 bytes
585 # are for the encryption/password information
586 if self.decrypter is not None:
587 bytesToRead -= 12
588
589 if size is not None and size >= 0:
590 if self.compress_type == ZIP_STORED:
591 lr = len(self.readbuffer)
592 bytesToRead = min(bytesToRead, size - lr)
593 elif self.compress_type == ZIP_DEFLATED:
594 if len(self.readbuffer) > size:
595 # the user has requested fewer bytes than we've already
596 # pulled through the decompressor; don't read any more
597 bytesToRead = 0
598 else:
599 # user will use up the buffer, so read some more
600 lr = len(self.rawbuffer)
601 bytesToRead = min(bytesToRead, self.compreadsize - lr)
602
603 # avoid reading past end of file contents
604 if bytesToRead + self.bytes_read > self.compress_size:
605 bytesToRead = self.compress_size - self.bytes_read
606
607 # try to read from file (if necessary)
608 if bytesToRead > 0:
609 bytes = self.fileobj.read(bytesToRead)
610 self.bytes_read += len(bytes)
611 self.rawbuffer += bytes
612
613 # handle contents of raw buffer
614 if self.rawbuffer:
615 newdata = self.rawbuffer
616 self.rawbuffer = ''
617
618 # decrypt new data if we were given an object to handle that
619 if newdata and self.decrypter is not None:
620 newdata = ''.join(map(self.decrypter, newdata))
621
622 # decompress newly read data if necessary
623 if newdata and self.compress_type == ZIP_DEFLATED:
624 newdata = self.dc.decompress(newdata)
625 self.rawbuffer = self.dc.unconsumed_tail
626 if self.eof and len(self.rawbuffer) == 0:
Tim Petersea5962f2007-03-12 18:07:52 +0000627 # we're out of raw bytes (both from the file and
628 # the local buffer); flush just to make sure the
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000629 # decompressor is done
630 newdata += self.dc.flush()
631 # prevent decompressor from being used again
632 self.dc = None
633
634 self.readbuffer += newdata
635
636
637 # return what the user asked for
638 if size is None or len(self.readbuffer) <= size:
639 bytes = self.readbuffer
640 self.readbuffer = ''
641 else:
642 bytes = self.readbuffer[:size]
643 self.readbuffer = self.readbuffer[size:]
644
645 return bytes
Tim Petersea5962f2007-03-12 18:07:52 +0000646
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000647
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000648class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000649 """ Class with methods to open, read, write, close, list zip files.
650
Martin v. Löwis8c436412008-07-03 12:51:14 +0000651 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000652
Fred Drake3d9091e2001-03-26 15:49:24 +0000653 file: Either the path to the file, or a file-like object.
654 If it is a path, the file will be opened and closed by ZipFile.
655 mode: The mode can be either read "r", write "w" or append "a".
656 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000657 allowZip64: if True ZipFile will create files with ZIP64 extensions when
658 needed, otherwise it will raise an exception when this would
659 be necessary.
660
Fred Drake3d9091e2001-03-26 15:49:24 +0000661 """
Fred Drake484d7352000-10-02 21:14:52 +0000662
Fred Drake90eac282001-02-28 05:29:34 +0000663 fp = None # Set here since __del__ checks it
664
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000665 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000666 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000667 if mode not in ("r", "w", "a"):
668 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
669
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000670 if compression == ZIP_STORED:
671 pass
672 elif compression == ZIP_DEFLATED:
673 if not zlib:
674 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000675 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000676 else:
677 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000678
679 self._allowZip64 = allowZip64
680 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000681 self.debug = 0 # Level of printing: 0 through 3
682 self.NameToInfo = {} # Find file info given name
683 self.filelist = [] # List of ZipInfo instances for archive
684 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000685 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000686 self.pwd = None
Martin v. Löwis8c436412008-07-03 12:51:14 +0000687 self.comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000688
Fred Drake3d9091e2001-03-26 15:49:24 +0000689 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000690 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000691 self._filePassed = 0
692 self.filename = file
693 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000694 try:
695 self.fp = open(file, modeDict[mode])
696 except IOError:
697 if mode == 'a':
698 mode = key = 'w'
699 self.fp = open(file, modeDict[mode])
700 else:
701 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000702 else:
703 self._filePassed = 1
704 self.fp = file
705 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000706
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000707 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000708 self._GetContents()
709 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000710 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000711 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000712 try: # See if file is a zip file
713 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000714 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000715 self.fp.seek(self.start_dir, 0)
716 except BadZipfile: # file is not a zip file, just append
717 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000718 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000719 if not self._filePassed:
720 self.fp.close()
721 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000722 raise RuntimeError, 'Mode must be "r", "w" or "a"'
723
Ezio Melotti569e61f2009-12-30 06:14:51 +0000724 def __enter__(self):
725 return self
726
727 def __exit__(self, type, value, traceback):
728 self.close()
729
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000730 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000731 """Read the directory, making sure we close the file if the format
732 is bad."""
733 try:
734 self._RealGetContents()
735 except BadZipfile:
736 if not self._filePassed:
737 self.fp.close()
738 self.fp = None
739 raise
740
741 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000742 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000743 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000744 endrec = _EndRecData(fp)
745 if not endrec:
746 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000747 if self.debug > 1:
748 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000749 size_cd = endrec[_ECD_SIZE] # bytes in central directory
750 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
751 self.comment = endrec[_ECD_COMMENT] # archive comment
752
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000753 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000754 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000755 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
756 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000757 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
758
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000759 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000760 inferred = concat + offset_cd
761 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000762 # self.start_dir: Position of start of central directory
763 self.start_dir = offset_cd + concat
764 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000765 data = fp.read(size_cd)
766 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000767 total = 0
768 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000769 centdir = fp.read(sizeCentralDir)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000770 if centdir[0:4] != stringCentralDir:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000771 raise BadZipfile, "Bad magic number for central directory"
772 centdir = struct.unpack(structCentralDir, centdir)
773 if self.debug > 2:
774 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000775 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000776 # Create ZipInfo instance to store file information
777 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000778 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
779 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000780 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000781 (x.create_version, x.create_system, x.extract_version, x.reserved,
782 x.flag_bits, x.compress_type, t, d,
783 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
784 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
785 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000786 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000787 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000788 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000789
790 x._decodeExtra()
791 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000792 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000793 self.filelist.append(x)
794 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000795
796 # update total bytes read from central directory
797 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
798 + centdir[_CD_EXTRA_FIELD_LENGTH]
799 + centdir[_CD_COMMENT_LENGTH])
800
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000801 if self.debug > 2:
802 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000803
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000804
805 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000806 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000807 l = []
808 for data in self.filelist:
809 l.append(data.filename)
810 return l
811
812 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000813 """Return a list of class ZipInfo instances for files in the
814 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000815 return self.filelist
816
817 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000818 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000819 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
820 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000821 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000822 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
823
824 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000825 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000826 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000827 for zinfo in self.filelist:
828 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000829 # Read by chunks, to avoid an OverflowError or a
830 # MemoryError with very large embedded files.
831 f = self.open(zinfo.filename, "r")
832 while f.read(chunk_size): # Check CRC-32
833 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000834 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000835 return zinfo.filename
836
837 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000838 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000839 info = self.NameToInfo.get(name)
840 if info is None:
841 raise KeyError(
842 'There is no item named %r in the archive' % name)
843
844 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000845
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000846 def setpassword(self, pwd):
847 """Set default password for encrypted files."""
848 self.pwd = pwd
849
850 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000851 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000852 return self.open(name, "r", pwd).read()
853
854 def open(self, name, mode="r", pwd=None):
855 """Return file-like object for 'name'."""
856 if mode not in ("r", "U", "rU"):
857 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000858 if not self.fp:
859 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000860 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000861
Tim Petersea5962f2007-03-12 18:07:52 +0000862 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000863 # given a file object in the constructor
864 if self._filePassed:
865 zef_file = self.fp
866 else:
867 zef_file = open(self.filename, 'rb')
868
Georg Brandl112aa502008-05-20 08:25:48 +0000869 # Make sure we have an info object
870 if isinstance(name, ZipInfo):
871 # 'name' is already an info object
872 zinfo = name
873 else:
874 # Get info object for name
875 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000876
877 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000878
879 # Skip the file header:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000880 fheader = zef_file.read(sizeFileHeader)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000881 if fheader[0:4] != stringFileHeader:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000882 raise BadZipfile, "Bad magic number for file header"
883
884 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000885 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000886 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000887 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000888
889 if fname != zinfo.orig_filename:
890 raise BadZipfile, \
891 'File name in directory "%s" and header "%s" differ.' % (
892 zinfo.orig_filename, fname)
893
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000894 # check for encrypted flag & handle password
895 is_encrypted = zinfo.flag_bits & 0x1
896 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000897 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000898 if not pwd:
899 pwd = self.pwd
900 if not pwd:
901 raise RuntimeError, "File %s is encrypted, " \
902 "password required for extraction" % name
903
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000904 zd = _ZipDecrypter(pwd)
905 # The first 12 bytes in the cypher stream is an encryption header
906 # used to strengthen the algorithm. The first 11 bytes are
907 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000908 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000909 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000910 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000911 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000912 if zinfo.flag_bits & 0x8:
913 # compare against the file type from extended local headers
914 check_byte = (zinfo._raw_time >> 8) & 0xff
915 else:
916 # compare against the CRC otherwise
917 check_byte = (zinfo.CRC >> 24) & 0xff
918 if ord(h[11]) != check_byte:
919 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000920
921 # build and return a ZipExtFile
922 if zd is None:
923 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000924 else:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000925 zef = ZipExtFile(zef_file, zinfo, zd)
926
927 # set universal newlines on ZipExtFile if necessary
928 if "U" in mode:
929 zef.set_univ_newlines(True)
930 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000931
Georg Brandl62416bc2008-01-07 18:47:44 +0000932 def extract(self, member, path=None, pwd=None):
933 """Extract a member from the archive to the current working directory,
934 using its full name. Its file information is extracted as accurately
935 as possible. `member' may be a filename or a ZipInfo object. You can
936 specify a different directory using `path'.
937 """
938 if not isinstance(member, ZipInfo):
939 member = self.getinfo(member)
940
941 if path is None:
942 path = os.getcwd()
943
944 return self._extract_member(member, path, pwd)
945
946 def extractall(self, path=None, members=None, pwd=None):
947 """Extract all members from the archive to the current working
948 directory. `path' specifies a different directory to extract to.
949 `members' is optional and must be a subset of the list returned
950 by namelist().
951 """
952 if members is None:
953 members = self.namelist()
954
955 for zipinfo in members:
956 self.extract(zipinfo, path, pwd)
957
958 def _extract_member(self, member, targetpath, pwd):
959 """Extract the ZipInfo object 'member' to a physical
960 file on the path targetpath.
961 """
962 # build the destination pathname, replacing
963 # forward slashes to platform specific separators.
Antoine Pitrou97377bf2009-05-04 21:17:17 +0000964 # Strip trailing path separator, unless it represents the root.
965 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
966 and len(os.path.splitdrive(targetpath)[1]) > 1):
Georg Brandl62416bc2008-01-07 18:47:44 +0000967 targetpath = targetpath[:-1]
968
969 # don't include leading "/" from file name if present
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000970 if member.filename[0] == '/':
Georg Brandl62416bc2008-01-07 18:47:44 +0000971 targetpath = os.path.join(targetpath, member.filename[1:])
972 else:
973 targetpath = os.path.join(targetpath, member.filename)
974
975 targetpath = os.path.normpath(targetpath)
976
977 # Create all upper directories if necessary.
978 upperdirs = os.path.dirname(targetpath)
979 if upperdirs and not os.path.exists(upperdirs):
980 os.makedirs(upperdirs)
981
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000982 if member.filename[-1] == '/':
Martin v. Löwis0b09c422009-05-24 19:30:52 +0000983 if not os.path.isdir(targetpath):
984 os.mkdir(targetpath)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +0000985 return targetpath
986
Georg Brandl112aa502008-05-20 08:25:48 +0000987 source = self.open(member, pwd=pwd)
Georg Brandl62416bc2008-01-07 18:47:44 +0000988 target = file(targetpath, "wb")
989 shutil.copyfileobj(source, target)
990 source.close()
991 target.close()
992
993 return targetpath
994
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000995 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000996 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000997 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000998 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000999 print "Duplicate name:", zinfo.filename
1000 if self.mode not in ("w", "a"):
1001 raise RuntimeError, 'write() requires mode "w" or "a"'
1002 if not self.fp:
1003 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001004 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001005 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1006 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001007 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001008 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1009 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001010 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001011 if zinfo.file_size > ZIP64_LIMIT:
1012 if not self._allowZip64:
1013 raise LargeZipFile("Filesize would require ZIP64 extensions")
1014 if zinfo.header_offset > ZIP64_LIMIT:
1015 if not self._allowZip64:
1016 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001017
1018 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001019 """Put the bytes from filename into the archive under the name
1020 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001021 if not self.fp:
1022 raise RuntimeError(
1023 "Attempt to write to ZIP archive that was already closed")
1024
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001025 st = os.stat(filename)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001026 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001027 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001028 date_time = mtime[0:6]
1029 # Create ZipInfo instance to store file information
1030 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001031 arcname = filename
1032 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1033 while arcname[0] in (os.sep, os.altsep):
1034 arcname = arcname[1:]
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001035 if isdir:
1036 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001037 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001038 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001039 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001040 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001041 else:
Tim Peterse1190062001-01-15 03:34:38 +00001042 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001043
1044 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001045 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001046 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001047
1048 self._writecheck(zinfo)
1049 self._didModify = True
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001050
1051 if isdir:
1052 zinfo.file_size = 0
1053 zinfo.compress_size = 0
1054 zinfo.CRC = 0
1055 self.filelist.append(zinfo)
1056 self.NameToInfo[zinfo.filename] = zinfo
1057 self.fp.write(zinfo.FileHeader())
1058 return
1059
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001060 with open(filename, "rb") as fp:
1061 # Must overwrite CRC and sizes with correct data later
1062 zinfo.CRC = CRC = 0
1063 zinfo.compress_size = compress_size = 0
1064 zinfo.file_size = file_size = 0
1065 self.fp.write(zinfo.FileHeader())
1066 if zinfo.compress_type == ZIP_DEFLATED:
1067 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1068 zlib.DEFLATED, -15)
1069 else:
1070 cmpr = None
1071 while 1:
1072 buf = fp.read(1024 * 8)
1073 if not buf:
1074 break
1075 file_size = file_size + len(buf)
1076 CRC = crc32(buf, CRC) & 0xffffffff
1077 if cmpr:
1078 buf = cmpr.compress(buf)
1079 compress_size = compress_size + len(buf)
1080 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001081 if cmpr:
1082 buf = cmpr.flush()
1083 compress_size = compress_size + len(buf)
1084 self.fp.write(buf)
1085 zinfo.compress_size = compress_size
1086 else:
1087 zinfo.compress_size = file_size
1088 zinfo.CRC = CRC
1089 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001090 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001091 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001092 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001093 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001094 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001095 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001096 self.filelist.append(zinfo)
1097 self.NameToInfo[zinfo.filename] = zinfo
1098
Just van Rossumb083cb32002-12-12 12:23:32 +00001099 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +00001100 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001101 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1102 the name of the file in the archive."""
1103 if not isinstance(zinfo_or_arcname, ZipInfo):
1104 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001105 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001106 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001107 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001108 else:
1109 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001110
1111 if not self.fp:
1112 raise RuntimeError(
1113 "Attempt to write to ZIP archive that was already closed")
1114
Tim Peterse1190062001-01-15 03:34:38 +00001115 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001116 zinfo.header_offset = self.fp.tell() # Start of header bytes
1117 self._writecheck(zinfo)
1118 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001119 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001120 if zinfo.compress_type == ZIP_DEFLATED:
1121 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1122 zlib.DEFLATED, -15)
1123 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001124 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001125 else:
1126 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +00001127 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001128 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001129 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001130 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001131 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001132 # Write CRC and file sizes after the file data
Gregory P. Smith26627332009-06-26 07:50:21 +00001133 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001134 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001135 self.filelist.append(zinfo)
1136 self.NameToInfo[zinfo.filename] = zinfo
1137
1138 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001139 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001140 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001141
1142 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001143 """Close the file, and for mode "w" and "a" write the ending
1144 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001145 if self.fp is None:
1146 return
Tim Petersa608bb22006-06-15 18:06:29 +00001147
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001148 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001149 count = 0
1150 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001151 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001152 count = count + 1
1153 dt = zinfo.date_time
1154 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001155 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001156 extra = []
1157 if zinfo.file_size > ZIP64_LIMIT \
1158 or zinfo.compress_size > ZIP64_LIMIT:
1159 extra.append(zinfo.file_size)
1160 extra.append(zinfo.compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001161 file_size = 0xffffffff
1162 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001163 else:
1164 file_size = zinfo.file_size
1165 compress_size = zinfo.compress_size
1166
1167 if zinfo.header_offset > ZIP64_LIMIT:
1168 extra.append(zinfo.header_offset)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001169 header_offset = 0xffffffffL
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001170 else:
1171 header_offset = zinfo.header_offset
1172
1173 extra_data = zinfo.extra
1174 if extra:
1175 # Append a ZIP64 field to the extra's
1176 extra_data = struct.pack(
Gregory P. Smithb89a0962008-03-19 01:46:10 +00001177 '<HH' + 'Q'*len(extra),
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001178 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001179
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001180 extract_version = max(45, zinfo.extract_version)
1181 create_version = max(45, zinfo.create_version)
1182 else:
1183 extract_version = zinfo.extract_version
1184 create_version = zinfo.create_version
1185
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001186 try:
Martin v. Löwis471617d2008-05-05 17:16:58 +00001187 filename, flag_bits = zinfo._encodeFilenameFlags()
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001188 centdir = struct.pack(structCentralDir,
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001189 stringCentralDir, create_version,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001190 zinfo.create_system, extract_version, zinfo.reserved,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001191 flag_bits, zinfo.compress_type, dostime, dosdate,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001192 zinfo.CRC, compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001193 len(filename), len(extra_data), len(zinfo.comment),
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001194 0, zinfo.internal_attr, zinfo.external_attr,
1195 header_offset)
1196 except DeprecationWarning:
1197 print >>sys.stderr, (structCentralDir,
1198 stringCentralDir, create_version,
1199 zinfo.create_system, extract_version, zinfo.reserved,
1200 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1201 zinfo.CRC, compress_size, file_size,
1202 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1203 0, zinfo.internal_attr, zinfo.external_attr,
1204 header_offset)
1205 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001206 self.fp.write(centdir)
Martin v. Löwis471617d2008-05-05 17:16:58 +00001207 self.fp.write(filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001208 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001209 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001210
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001211 pos2 = self.fp.tell()
1212 # Write end-of-zip-archive record
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001213 centDirCount = count
1214 centDirSize = pos2 - pos1
Martin v. Löwis8c436412008-07-03 12:51:14 +00001215 centDirOffset = pos1
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001216 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1217 centDirOffset > ZIP64_LIMIT or
1218 centDirSize > ZIP64_LIMIT):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001219 # Need to write the ZIP64 end-of-archive records
1220 zip64endrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001221 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001222 44, 45, 45, 0, 0, centDirCount, centDirCount,
1223 centDirSize, centDirOffset)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001224 self.fp.write(zip64endrec)
1225
1226 zip64locrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001227 structEndArchive64Locator,
1228 stringEndArchive64Locator, 0, pos2, 1)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001229 self.fp.write(zip64locrec)
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001230 centDirCount = min(centDirCount, 0xFFFF)
1231 centDirSize = min(centDirSize, 0xFFFFFFFF)
1232 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001233
Martin v. Löwis8c436412008-07-03 12:51:14 +00001234 # check for valid comment length
1235 if len(self.comment) >= ZIP_MAX_COMMENT:
1236 if self.debug > 0:
1237 msg = 'Archive comment is too long; truncating to %d bytes' \
1238 % ZIP_MAX_COMMENT
1239 self.comment = self.comment[:ZIP_MAX_COMMENT]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001240
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001241 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +00001242 0, 0, centDirCount, centDirCount,
1243 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwis8c436412008-07-03 12:51:14 +00001244 self.fp.write(endrec)
1245 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001246 self.fp.flush()
Martin v. Löwis8c436412008-07-03 12:51:14 +00001247
Fred Drake3d9091e2001-03-26 15:49:24 +00001248 if not self._filePassed:
1249 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001250 self.fp = None
1251
1252
1253class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001254 """Class to create ZIP archives with Python library files and packages."""
1255
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001256 def writepy(self, pathname, basename = ""):
1257 """Add all files from "pathname" to the ZIP archive.
1258
Fred Drake484d7352000-10-02 21:14:52 +00001259 If pathname is a package directory, search the directory and
1260 all package subdirectories recursively for all *.py and enter
1261 the modules into the archive. If pathname is a plain
1262 directory, listdir *.py and enter all modules. Else, pathname
1263 must be a Python *.py file and the module will be put into the
1264 archive. Added modules are always module.pyo or module.pyc.
1265 This method will compile the module.py into module.pyc if
1266 necessary.
1267 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001268 dir, name = os.path.split(pathname)
1269 if os.path.isdir(pathname):
1270 initname = os.path.join(pathname, "__init__.py")
1271 if os.path.isfile(initname):
1272 # This is a package directory, add it
1273 if basename:
1274 basename = "%s/%s" % (basename, name)
1275 else:
1276 basename = name
1277 if self.debug:
1278 print "Adding package in", pathname, "as", basename
1279 fname, arcname = self._get_codename(initname[0:-3], basename)
1280 if self.debug:
1281 print "Adding", arcname
1282 self.write(fname, arcname)
1283 dirlist = os.listdir(pathname)
1284 dirlist.remove("__init__.py")
1285 # Add all *.py files and package subdirectories
1286 for filename in dirlist:
1287 path = os.path.join(pathname, filename)
1288 root, ext = os.path.splitext(filename)
1289 if os.path.isdir(path):
1290 if os.path.isfile(os.path.join(path, "__init__.py")):
1291 # This is a package directory, add it
1292 self.writepy(path, basename) # Recursive call
1293 elif ext == ".py":
1294 fname, arcname = self._get_codename(path[0:-3],
1295 basename)
1296 if self.debug:
1297 print "Adding", arcname
1298 self.write(fname, arcname)
1299 else:
1300 # This is NOT a package directory, add its files at top level
1301 if self.debug:
1302 print "Adding files from directory", pathname
1303 for filename in os.listdir(pathname):
1304 path = os.path.join(pathname, filename)
1305 root, ext = os.path.splitext(filename)
1306 if ext == ".py":
1307 fname, arcname = self._get_codename(path[0:-3],
1308 basename)
1309 if self.debug:
1310 print "Adding", arcname
1311 self.write(fname, arcname)
1312 else:
1313 if pathname[-3:] != ".py":
1314 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001315 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001316 fname, arcname = self._get_codename(pathname[0:-3], basename)
1317 if self.debug:
1318 print "Adding file", arcname
1319 self.write(fname, arcname)
1320
1321 def _get_codename(self, pathname, basename):
1322 """Return (filename, archivename) for the path.
1323
Fred Drake484d7352000-10-02 21:14:52 +00001324 Given a module name path, return the correct file path and
1325 archive name, compiling if necessary. For example, given
1326 /python/lib/string, return (/python/lib/string.pyc, string).
1327 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001328 file_py = pathname + ".py"
1329 file_pyc = pathname + ".pyc"
1330 file_pyo = pathname + ".pyo"
1331 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001332 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001333 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001334 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001335 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001336 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001337 if self.debug:
1338 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001339 try:
1340 py_compile.compile(file_py, file_pyc, None, True)
1341 except py_compile.PyCompileError,err:
1342 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001343 fname = file_pyc
1344 else:
1345 fname = file_pyc
1346 archivename = os.path.split(fname)[1]
1347 if basename:
1348 archivename = "%s/%s" % (basename, archivename)
1349 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001350
1351
1352def main(args = None):
1353 import textwrap
1354 USAGE=textwrap.dedent("""\
1355 Usage:
1356 zipfile.py -l zipfile.zip # Show listing of a zipfile
1357 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1358 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1359 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1360 """)
1361 if args is None:
1362 args = sys.argv[1:]
1363
1364 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1365 print USAGE
1366 sys.exit(1)
1367
1368 if args[0] == '-l':
1369 if len(args) != 2:
1370 print USAGE
1371 sys.exit(1)
1372 zf = ZipFile(args[1], 'r')
1373 zf.printdir()
1374 zf.close()
1375
1376 elif args[0] == '-t':
1377 if len(args) != 2:
1378 print USAGE
1379 sys.exit(1)
1380 zf = ZipFile(args[1], 'r')
1381 zf.testzip()
1382 print "Done testing"
1383
1384 elif args[0] == '-e':
1385 if len(args) != 3:
1386 print USAGE
1387 sys.exit(1)
1388
1389 zf = ZipFile(args[1], 'r')
1390 out = args[2]
1391 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001392 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001393 tgt = os.path.join(out, path[2:])
1394 else:
1395 tgt = os.path.join(out, path)
1396
1397 tgtdir = os.path.dirname(tgt)
1398 if not os.path.exists(tgtdir):
1399 os.makedirs(tgtdir)
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001400 with open(tgt, 'wb') as fp:
1401 fp.write(zf.read(path))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001402 zf.close()
1403
1404 elif args[0] == '-c':
1405 if len(args) < 3:
1406 print USAGE
1407 sys.exit(1)
1408
1409 def addToZip(zf, path, zippath):
1410 if os.path.isfile(path):
1411 zf.write(path, zippath, ZIP_DEFLATED)
1412 elif os.path.isdir(path):
1413 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001414 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001415 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001416 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001417
1418 zf = ZipFile(args[1], 'w', allowZip64=True)
1419 for src in args[2:]:
1420 addToZip(zf, src, os.path.basename(src))
1421
1422 zf.close()
1423
1424if __name__ == "__main__":
1425 main()