blob: 609dea3f273d7db5866c705e38f1511a6cbb694b [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Ronald Oussoren143cefb2006-06-15 08:14:18 +00005import binascii, cStringIO
Guido van Rossum32abe6f2000-03-31 17:30:02 +00006
7try:
Tim Peterse1190062001-01-15 03:34:38 +00008 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +00009 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000010except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000011 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000012 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013
Skip Montanaro40fc1602001-03-01 04:27:19 +000014__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000015 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000016
Fred Drake5db246d2000-09-29 20:44:48 +000017class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000019
20
21class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000022 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000023 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
24 and those extensions are disabled.
25 """
26
Tim Peterse1190062001-01-15 03:34:38 +000027error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Ronald Oussoren143cefb2006-06-15 08:14:18 +000029ZIP64_LIMIT= (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000030ZIP_FILECOUNT_LIMIT = 1 << 16
31ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000032
Guido van Rossum32abe6f2000-03-31 17:30:02 +000033# constants for Zip file compression methods
34ZIP_STORED = 0
35ZIP_DEFLATED = 8
36# Other ZIP compression methods not supported
37
Martin v. Löwis8c436412008-07-03 12:51:14 +000038# Below are some formats and associated data for reading/writing headers using
39# the struct module. The names and structures of headers/records are those used
40# in the PKWARE description of the ZIP file format:
41# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
42# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000043
Martin v. Löwis8c436412008-07-03 12:51:14 +000044# The "end of central directory" structure, magic number, size, and indices
45# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000046structEndArchive = "<4s4H2LH"
47stringEndArchive = "PK\005\006"
48sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000049
50_ECD_SIGNATURE = 0
51_ECD_DISK_NUMBER = 1
52_ECD_DISK_START = 2
53_ECD_ENTRIES_THIS_DISK = 3
54_ECD_ENTRIES_TOTAL = 4
55_ECD_SIZE = 5
56_ECD_OFFSET = 6
57_ECD_COMMENT_SIZE = 7
58# These last two indices are not part of the structure as defined in the
59# spec, but they are used internally by this module as a convenience
60_ECD_COMMENT = 8
61_ECD_LOCATION = 9
62
63# The "central directory" structure, magic number, size, and indices
64# of entries in the structure (section V.F in the format document)
65structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000066stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000067sizeCentralDir = struct.calcsize(structCentralDir)
68
Fred Drake3e038e52001-02-28 17:56:26 +000069# indexes of entries in the central directory structure
70_CD_SIGNATURE = 0
71_CD_CREATE_VERSION = 1
72_CD_CREATE_SYSTEM = 2
73_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000074_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000075_CD_FLAG_BITS = 5
76_CD_COMPRESS_TYPE = 6
77_CD_TIME = 7
78_CD_DATE = 8
79_CD_CRC = 9
80_CD_COMPRESSED_SIZE = 10
81_CD_UNCOMPRESSED_SIZE = 11
82_CD_FILENAME_LENGTH = 12
83_CD_EXTRA_FIELD_LENGTH = 13
84_CD_COMMENT_LENGTH = 14
85_CD_DISK_NUMBER_START = 15
86_CD_INTERNAL_FILE_ATTRIBUTES = 16
87_CD_EXTERNAL_FILE_ATTRIBUTES = 17
88_CD_LOCAL_HEADER_OFFSET = 18
89
Martin v. Löwis8c436412008-07-03 12:51:14 +000090# The "local file header" structure, magic number, size, and indices
91# (section V.A in the format document)
92structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000093stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000094sizeFileHeader = struct.calcsize(structFileHeader)
95
Fred Drake3e038e52001-02-28 17:56:26 +000096_FH_SIGNATURE = 0
97_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000098_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +000099_FH_GENERAL_PURPOSE_FLAG_BITS = 3
100_FH_COMPRESSION_METHOD = 4
101_FH_LAST_MOD_TIME = 5
102_FH_LAST_MOD_DATE = 6
103_FH_CRC = 7
104_FH_COMPRESSED_SIZE = 8
105_FH_UNCOMPRESSED_SIZE = 9
106_FH_FILENAME_LENGTH = 10
107_FH_EXTRA_FIELD_LENGTH = 11
108
Martin v. Löwis8c436412008-07-03 12:51:14 +0000109# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000110structEndArchive64Locator = "<4sLQL"
111stringEndArchive64Locator = "PK\x06\x07"
112sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000113
114# The "Zip64 end of central directory" record, magic number, size, and indices
115# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000116structEndArchive64 = "<4sQ2H2L4Q"
117stringEndArchive64 = "PK\x06\x06"
118sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000119
120_CD64_SIGNATURE = 0
121_CD64_DIRECTORY_RECSIZE = 1
122_CD64_CREATE_VERSION = 2
123_CD64_EXTRACT_VERSION = 3
124_CD64_DISK_NUMBER = 4
125_CD64_DISK_NUMBER_START = 5
126_CD64_NUMBER_ENTRIES_THIS_DISK = 6
127_CD64_NUMBER_ENTRIES_TOTAL = 7
128_CD64_DIRECTORY_SIZE = 8
129_CD64_OFFSET_START_CENTDIR = 9
130
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000131def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000132 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000133 try:
134 fpin = open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000135 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000136 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000137 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000138 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000139 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000141 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000142
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000143def _EndRecData64(fpin, offset, endrec):
144 """
145 Read the ZIP64 end-of-archive records and use that to update endrec
146 """
Martin v. Löwis8c436412008-07-03 12:51:14 +0000147 fpin.seek(offset - sizeEndCentDir64Locator, 2)
148 data = fpin.read(sizeEndCentDir64Locator)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000149 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
150 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000151 return endrec
152
153 if diskno != 0 or disks != 1:
154 raise BadZipfile("zipfiles that span multiple disks are not supported")
155
Tim Petersa608bb22006-06-15 18:06:29 +0000156 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000157 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
158 data = fpin.read(sizeEndCentDir64)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000159 sig, sz, create_version, read_version, disk_num, disk_dir, \
160 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000161 struct.unpack(structEndArchive64, data)
162 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000163 return endrec
164
165 # Update the original endrec using data from the ZIP64 record
Martin v. Löwis8c436412008-07-03 12:51:14 +0000166 endrec[_ECD_DISK_NUMBER] = disk_num
167 endrec[_ECD_DISK_START] = disk_dir
168 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
169 endrec[_ECD_ENTRIES_TOTAL] = dircount2
170 endrec[_ECD_SIZE] = dirsize
171 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000172 return endrec
173
174
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000175def _EndRecData(fpin):
176 """Return data from the "End of Central Directory" record, or None.
177
178 The data is a list of the nine items in the ZIP "End of central dir"
179 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000180
181 # Determine file size
182 fpin.seek(0, 2)
183 filesize = fpin.tell()
184
185 # Check to see if this is ZIP file with no archive comment (the
186 # "end of central directory" structure should be the last item in the
187 # file if this is the case).
188 fpin.seek(-sizeEndCentDir, 2)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000189 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000190 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
Martin v. Löwis8c436412008-07-03 12:51:14 +0000191 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000192 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000193 endrec=list(endrec)
194
195 # Append a blank comment and record start offset
196 endrec.append("")
197 endrec.append(filesize - sizeEndCentDir)
198 if endrec[_ECD_OFFSET] == 0xffffffff:
199 # the value for the "offset of the start of the central directory"
200 # indicates that there is a "Zip64 end of central directory"
201 # structure present, so go look for it
202 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
203
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000204 return endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000205
206 # Either this is not a ZIP file, or it is a ZIP file with an archive
207 # comment. Search the end of the file for the "end of central directory"
208 # record signature. The comment is the last item in the ZIP file and may be
209 # up to 64K long. It is assumed that the "end of central directory" magic
210 # number does not appear in the comment.
211 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
212 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000213 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000214 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000215 if start >= 0:
216 # found the magic number; attempt to unpack and interpret
217 recData = data[start:start+sizeEndCentDir]
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000218 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwis8c436412008-07-03 12:51:14 +0000219 comment = data[start+sizeEndCentDir:]
220 # check that comment length is correct
221 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000222 # Append the archive comment and start offset
223 endrec.append(comment)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000224 endrec.append(maxCommentStart + start)
225 if endrec[_ECD_OFFSET] == 0xffffffff:
226 # There is apparently a "Zip64 end of central directory"
227 # structure present, so go look for it
228 return _EndRecData64(fpin, start - filesize, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000229 return endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000230
231 # Unable to find a valid end of central directory structure
232 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000233
Fred Drake484d7352000-10-02 21:14:52 +0000234
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000235class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000236 """Class with attributes describing each file in the ZIP archive."""
237
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000238 __slots__ = (
239 'orig_filename',
240 'filename',
241 'date_time',
242 'compress_type',
243 'comment',
244 'extra',
245 'create_system',
246 'create_version',
247 'extract_version',
248 'reserved',
249 'flag_bits',
250 'volume',
251 'internal_attr',
252 'external_attr',
253 'header_offset',
254 'CRC',
255 'compress_size',
256 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000257 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000258 )
259
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000260 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000261 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000262
263 # Terminate the file name at the first null byte. Null bytes in file
264 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000265 null_byte = filename.find(chr(0))
266 if null_byte >= 0:
267 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000268 # This is used to ensure paths in generated ZIP files always use
269 # forward slashes as the directory separator, as required by the
270 # ZIP format specification.
271 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000272 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000273
Greg Ward8e36d282003-06-18 00:53:06 +0000274 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000275 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000276 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000277 self.compress_type = ZIP_STORED # Type of compression for the file
278 self.comment = "" # Comment for each file
279 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000280 if sys.platform == 'win32':
281 self.create_system = 0 # System which created ZIP archive
282 else:
283 # Assume everything else is unix-y
284 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000285 self.create_version = 20 # Version which created ZIP archive
286 self.extract_version = 20 # Version needed to extract archive
287 self.reserved = 0 # Must be zero
288 self.flag_bits = 0 # ZIP flag bits
289 self.volume = 0 # Volume number of file header
290 self.internal_attr = 0 # Internal attributes
291 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000292 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000293 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000294 # CRC CRC-32 of the uncompressed file
295 # compress_size Size of the compressed file
296 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000297
298 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000299 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000300 dt = self.date_time
301 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000302 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000303 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000304 # Set these to zero because we write them after the file data
305 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000306 else:
Tim Peterse1190062001-01-15 03:34:38 +0000307 CRC = self.CRC
308 compress_size = self.compress_size
309 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000310
311 extra = self.extra
312
313 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
314 # File is larger than what fits into a 4 byte integer,
315 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000316 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000317 extra = extra + struct.pack(fmt,
318 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000319 file_size = 0xffffffff
320 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000321 self.extract_version = max(45, self.extract_version)
322 self.create_version = max(45, self.extract_version)
323
Martin v. Löwis471617d2008-05-05 17:16:58 +0000324 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000325 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000326 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000327 self.compress_type, dostime, dosdate, CRC,
328 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000329 len(filename), len(extra))
330 return header + filename + extra
331
332 def _encodeFilenameFlags(self):
333 if isinstance(self.filename, unicode):
334 try:
335 return self.filename.encode('ascii'), self.flag_bits
336 except UnicodeEncodeError:
337 return self.filename.encode('utf-8'), self.flag_bits | 0x800
338 else:
339 return self.filename, self.flag_bits
340
341 def _decodeFilename(self):
342 if self.flag_bits & 0x800:
343 return self.filename.decode('utf-8')
344 else:
345 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000346
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000347 def _decodeExtra(self):
348 # Try to decode the extra field.
349 extra = self.extra
350 unpack = struct.unpack
351 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000352 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000353 if tp == 1:
354 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000355 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000356 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000357 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000358 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000359 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000360 elif ln == 0:
361 counts = ()
362 else:
363 raise RuntimeError, "Corrupt extra field %s"%(ln,)
364
365 idx = 0
366
367 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000368 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000369 self.file_size = counts[idx]
370 idx += 1
371
Martin v. Löwis8c436412008-07-03 12:51:14 +0000372 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000373 self.compress_size = counts[idx]
374 idx += 1
375
Martin v. Löwis8c436412008-07-03 12:51:14 +0000376 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000377 old = self.header_offset
378 self.header_offset = counts[idx]
379 idx+=1
380
381 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000382
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000383
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000384class _ZipDecrypter:
385 """Class to handle decryption of files stored within a ZIP archive.
386
387 ZIP supports a password-based form of encryption. Even though known
388 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000389 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000390
391 Usage:
392 zd = _ZipDecrypter(mypwd)
393 plain_char = zd(cypher_char)
394 plain_text = map(zd, cypher_text)
395 """
396
397 def _GenerateCRCTable():
398 """Generate a CRC-32 table.
399
400 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
401 internal keys. We noticed that a direct implementation is faster than
402 relying on binascii.crc32().
403 """
404 poly = 0xedb88320
405 table = [0] * 256
406 for i in range(256):
407 crc = i
408 for j in range(8):
409 if crc & 1:
410 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
411 else:
412 crc = ((crc >> 1) & 0x7FFFFFFF)
413 table[i] = crc
414 return table
415 crctable = _GenerateCRCTable()
416
417 def _crc32(self, ch, crc):
418 """Compute the CRC32 primitive on one byte."""
419 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
420
421 def __init__(self, pwd):
422 self.key0 = 305419896
423 self.key1 = 591751049
424 self.key2 = 878082192
425 for p in pwd:
426 self._UpdateKeys(p)
427
428 def _UpdateKeys(self, c):
429 self.key0 = self._crc32(c, self.key0)
430 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
431 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
432 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
433
434 def __call__(self, c):
435 """Decrypt a single character."""
436 c = ord(c)
437 k = self.key2 | 2
438 c = c ^ (((k * (k^1)) >> 8) & 255)
439 c = chr(c)
440 self._UpdateKeys(c)
441 return c
442
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000443class ZipExtFile:
444 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000445 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000446 """
Tim Petersea5962f2007-03-12 18:07:52 +0000447
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000448 def __init__(self, fileobj, zipinfo, decrypt=None):
449 self.fileobj = fileobj
450 self.decrypter = decrypt
451 self.bytes_read = 0L
452 self.rawbuffer = ''
453 self.readbuffer = ''
454 self.linebuffer = ''
455 self.eof = False
456 self.univ_newlines = False
457 self.nlSeps = ("\n", )
458 self.lastdiscard = ''
459
460 self.compress_type = zipinfo.compress_type
461 self.compress_size = zipinfo.compress_size
Tim Petersea5962f2007-03-12 18:07:52 +0000462
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000463 self.closed = False
464 self.mode = "r"
465 self.name = zipinfo.filename
466
467 # read from compressed files in 64k blocks
468 self.compreadsize = 64*1024
469 if self.compress_type == ZIP_DEFLATED:
470 self.dc = zlib.decompressobj(-15)
471
472 def set_univ_newlines(self, univ_newlines):
473 self.univ_newlines = univ_newlines
Tim Petersea5962f2007-03-12 18:07:52 +0000474
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000475 # pick line separator char(s) based on universal newlines flag
476 self.nlSeps = ("\n", )
477 if self.univ_newlines:
478 self.nlSeps = ("\r\n", "\r", "\n")
479
480 def __iter__(self):
481 return self
Tim Petersea5962f2007-03-12 18:07:52 +0000482
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000483 def next(self):
484 nextline = self.readline()
485 if not nextline:
486 raise StopIteration()
487
488 return nextline
489
490 def close(self):
491 self.closed = True
492
493 def _checkfornewline(self):
494 nl, nllen = -1, -1
495 if self.linebuffer:
496 # ugly check for cases where half of an \r\n pair was
497 # read on the last pass, and the \r was discarded. In this
498 # case we just throw away the \n at the start of the buffer.
499 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
500 self.linebuffer = self.linebuffer[1:]
501
Tim Petersea5962f2007-03-12 18:07:52 +0000502 for sep in self.nlSeps:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000503 nl = self.linebuffer.find(sep)
504 if nl >= 0:
505 nllen = len(sep)
506 return nl, nllen
507
508 return nl, nllen
Tim Petersea5962f2007-03-12 18:07:52 +0000509
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000510 def readline(self, size = -1):
511 """Read a line with approx. size. If size is negative,
Tim Petersea5962f2007-03-12 18:07:52 +0000512 read a whole line.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000513 """
514 if size < 0:
515 size = sys.maxint
516 elif size == 0:
517 return ''
518
519 # check for a newline already in buffer
520 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000521
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000522 if nl >= 0:
523 # the next line was already in the buffer
524 nl = min(nl, size)
525 else:
526 # no line break in buffer - try to read more
527 size -= len(self.linebuffer)
528 while nl < 0 and size > 0:
529 buf = self.read(min(size, 100))
530 if not buf:
531 break
532 self.linebuffer += buf
533 size -= len(buf)
534
535 # check for a newline in buffer
536 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000537
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000538 # we either ran out of bytes in the file, or
539 # met the specified size limit without finding a newline,
540 # so return current buffer
541 if nl < 0:
542 s = self.linebuffer
543 self.linebuffer = ''
544 return s
545
546 buf = self.linebuffer[:nl]
547 self.lastdiscard = self.linebuffer[nl:nl + nllen]
548 self.linebuffer = self.linebuffer[nl + nllen:]
549
550 # line is always returned with \n as newline char (except possibly
551 # for a final incomplete line in the file, which is handled above).
552 return buf + "\n"
553
554 def readlines(self, sizehint = -1):
555 """Return a list with all (following) lines. The sizehint parameter
556 is ignored in this implementation.
557 """
558 result = []
559 while True:
560 line = self.readline()
561 if not line: break
562 result.append(line)
563 return result
564
565 def read(self, size = None):
566 # act like file() obj and return empty string if size is 0
567 if size == 0:
568 return ''
569
570 # determine read size
571 bytesToRead = self.compress_size - self.bytes_read
572
573 # adjust read size for encrypted files since the first 12 bytes
574 # are for the encryption/password information
575 if self.decrypter is not None:
576 bytesToRead -= 12
577
578 if size is not None and size >= 0:
579 if self.compress_type == ZIP_STORED:
580 lr = len(self.readbuffer)
581 bytesToRead = min(bytesToRead, size - lr)
582 elif self.compress_type == ZIP_DEFLATED:
583 if len(self.readbuffer) > size:
584 # the user has requested fewer bytes than we've already
585 # pulled through the decompressor; don't read any more
586 bytesToRead = 0
587 else:
588 # user will use up the buffer, so read some more
589 lr = len(self.rawbuffer)
590 bytesToRead = min(bytesToRead, self.compreadsize - lr)
591
592 # avoid reading past end of file contents
593 if bytesToRead + self.bytes_read > self.compress_size:
594 bytesToRead = self.compress_size - self.bytes_read
595
596 # try to read from file (if necessary)
597 if bytesToRead > 0:
598 bytes = self.fileobj.read(bytesToRead)
599 self.bytes_read += len(bytes)
600 self.rawbuffer += bytes
601
602 # handle contents of raw buffer
603 if self.rawbuffer:
604 newdata = self.rawbuffer
605 self.rawbuffer = ''
606
607 # decrypt new data if we were given an object to handle that
608 if newdata and self.decrypter is not None:
609 newdata = ''.join(map(self.decrypter, newdata))
610
611 # decompress newly read data if necessary
612 if newdata and self.compress_type == ZIP_DEFLATED:
613 newdata = self.dc.decompress(newdata)
614 self.rawbuffer = self.dc.unconsumed_tail
615 if self.eof and len(self.rawbuffer) == 0:
Tim Petersea5962f2007-03-12 18:07:52 +0000616 # we're out of raw bytes (both from the file and
617 # the local buffer); flush just to make sure the
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000618 # decompressor is done
619 newdata += self.dc.flush()
620 # prevent decompressor from being used again
621 self.dc = None
622
623 self.readbuffer += newdata
624
625
626 # return what the user asked for
627 if size is None or len(self.readbuffer) <= size:
628 bytes = self.readbuffer
629 self.readbuffer = ''
630 else:
631 bytes = self.readbuffer[:size]
632 self.readbuffer = self.readbuffer[size:]
633
634 return bytes
Tim Petersea5962f2007-03-12 18:07:52 +0000635
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000636
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000637class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000638 """ Class with methods to open, read, write, close, list zip files.
639
Martin v. Löwis8c436412008-07-03 12:51:14 +0000640 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000641
Fred Drake3d9091e2001-03-26 15:49:24 +0000642 file: Either the path to the file, or a file-like object.
643 If it is a path, the file will be opened and closed by ZipFile.
644 mode: The mode can be either read "r", write "w" or append "a".
645 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000646 allowZip64: if True ZipFile will create files with ZIP64 extensions when
647 needed, otherwise it will raise an exception when this would
648 be necessary.
649
Fred Drake3d9091e2001-03-26 15:49:24 +0000650 """
Fred Drake484d7352000-10-02 21:14:52 +0000651
Fred Drake90eac282001-02-28 05:29:34 +0000652 fp = None # Set here since __del__ checks it
653
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000654 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000655 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000656 if mode not in ("r", "w", "a"):
657 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
658
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000659 if compression == ZIP_STORED:
660 pass
661 elif compression == ZIP_DEFLATED:
662 if not zlib:
663 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000664 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000665 else:
666 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000667
668 self._allowZip64 = allowZip64
669 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000670 self.debug = 0 # Level of printing: 0 through 3
671 self.NameToInfo = {} # Find file info given name
672 self.filelist = [] # List of ZipInfo instances for archive
673 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000674 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000675 self.pwd = None
Martin v. Löwis8c436412008-07-03 12:51:14 +0000676 self.comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000677
Fred Drake3d9091e2001-03-26 15:49:24 +0000678 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000679 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000680 self._filePassed = 0
681 self.filename = file
682 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000683 try:
684 self.fp = open(file, modeDict[mode])
685 except IOError:
686 if mode == 'a':
687 mode = key = 'w'
688 self.fp = open(file, modeDict[mode])
689 else:
690 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000691 else:
692 self._filePassed = 1
693 self.fp = file
694 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000695
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000696 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000697 self._GetContents()
698 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000699 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000700 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000701 try: # See if file is a zip file
702 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000703 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000704 self.fp.seek(self.start_dir, 0)
705 except BadZipfile: # file is not a zip file, just append
706 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000707 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000708 if not self._filePassed:
709 self.fp.close()
710 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000711 raise RuntimeError, 'Mode must be "r", "w" or "a"'
712
713 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000714 """Read the directory, making sure we close the file if the format
715 is bad."""
716 try:
717 self._RealGetContents()
718 except BadZipfile:
719 if not self._filePassed:
720 self.fp.close()
721 self.fp = None
722 raise
723
724 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000725 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000726 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000727 endrec = _EndRecData(fp)
728 if not endrec:
729 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000730 if self.debug > 1:
731 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000732 size_cd = endrec[_ECD_SIZE] # bytes in central directory
733 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
734 self.comment = endrec[_ECD_COMMENT] # archive comment
735
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000736 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000737 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
738 if endrec[_ECD_LOCATION] > ZIP64_LIMIT:
739 # If the offset of the "End of Central Dir" record requires Zip64
740 # extension structures, account for them
741 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
742
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000743 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000744 inferred = concat + offset_cd
745 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000746 # self.start_dir: Position of start of central directory
747 self.start_dir = offset_cd + concat
748 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000749 data = fp.read(size_cd)
750 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000751 total = 0
752 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000753 centdir = fp.read(sizeCentralDir)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000754 if centdir[0:4] != stringCentralDir:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000755 raise BadZipfile, "Bad magic number for central directory"
756 centdir = struct.unpack(structCentralDir, centdir)
757 if self.debug > 2:
758 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000759 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000760 # Create ZipInfo instance to store file information
761 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000762 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
763 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000764 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000765 (x.create_version, x.create_system, x.extract_version, x.reserved,
766 x.flag_bits, x.compress_type, t, d,
767 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
768 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
769 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000770 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000771 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000772 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000773
774 x._decodeExtra()
775 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000776 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000777 self.filelist.append(x)
778 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000779
780 # update total bytes read from central directory
781 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
782 + centdir[_CD_EXTRA_FIELD_LENGTH]
783 + centdir[_CD_COMMENT_LENGTH])
784
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000785 if self.debug > 2:
786 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000787
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000788
789 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000790 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000791 l = []
792 for data in self.filelist:
793 l.append(data.filename)
794 return l
795
796 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000797 """Return a list of class ZipInfo instances for files in the
798 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000799 return self.filelist
800
801 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000802 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000803 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
804 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000805 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000806 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
807
808 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000809 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000810 for zinfo in self.filelist:
811 try:
Tim Peterse1190062001-01-15 03:34:38 +0000812 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000813 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000814 return zinfo.filename
815
816 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000817 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000818 info = self.NameToInfo.get(name)
819 if info is None:
820 raise KeyError(
821 'There is no item named %r in the archive' % name)
822
823 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000824
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000825 def setpassword(self, pwd):
826 """Set default password for encrypted files."""
827 self.pwd = pwd
828
829 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000830 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000831 return self.open(name, "r", pwd).read()
832
833 def open(self, name, mode="r", pwd=None):
834 """Return file-like object for 'name'."""
835 if mode not in ("r", "U", "rU"):
836 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000837 if not self.fp:
838 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000839 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000840
Tim Petersea5962f2007-03-12 18:07:52 +0000841 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000842 # given a file object in the constructor
843 if self._filePassed:
844 zef_file = self.fp
845 else:
846 zef_file = open(self.filename, 'rb')
847
Georg Brandl112aa502008-05-20 08:25:48 +0000848 # Make sure we have an info object
849 if isinstance(name, ZipInfo):
850 # 'name' is already an info object
851 zinfo = name
852 else:
853 # Get info object for name
854 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000855
856 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000857
858 # Skip the file header:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000859 fheader = zef_file.read(sizeFileHeader)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000860 if fheader[0:4] != stringFileHeader:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000861 raise BadZipfile, "Bad magic number for file header"
862
863 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000864 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000865 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000866 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000867
868 if fname != zinfo.orig_filename:
869 raise BadZipfile, \
870 'File name in directory "%s" and header "%s" differ.' % (
871 zinfo.orig_filename, fname)
872
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000873 # check for encrypted flag & handle password
874 is_encrypted = zinfo.flag_bits & 0x1
875 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000876 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000877 if not pwd:
878 pwd = self.pwd
879 if not pwd:
880 raise RuntimeError, "File %s is encrypted, " \
881 "password required for extraction" % name
882
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000883 zd = _ZipDecrypter(pwd)
884 # The first 12 bytes in the cypher stream is an encryption header
885 # used to strengthen the algorithm. The first 11 bytes are
886 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000887 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000888 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000889 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000890 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000891 if zinfo.flag_bits & 0x8:
892 # compare against the file type from extended local headers
893 check_byte = (zinfo._raw_time >> 8) & 0xff
894 else:
895 # compare against the CRC otherwise
896 check_byte = (zinfo.CRC >> 24) & 0xff
897 if ord(h[11]) != check_byte:
898 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000899
900 # build and return a ZipExtFile
901 if zd is None:
902 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000903 else:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000904 zef = ZipExtFile(zef_file, zinfo, zd)
905
906 # set universal newlines on ZipExtFile if necessary
907 if "U" in mode:
908 zef.set_univ_newlines(True)
909 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000910
Georg Brandl62416bc2008-01-07 18:47:44 +0000911 def extract(self, member, path=None, pwd=None):
912 """Extract a member from the archive to the current working directory,
913 using its full name. Its file information is extracted as accurately
914 as possible. `member' may be a filename or a ZipInfo object. You can
915 specify a different directory using `path'.
916 """
917 if not isinstance(member, ZipInfo):
918 member = self.getinfo(member)
919
920 if path is None:
921 path = os.getcwd()
922
923 return self._extract_member(member, path, pwd)
924
925 def extractall(self, path=None, members=None, pwd=None):
926 """Extract all members from the archive to the current working
927 directory. `path' specifies a different directory to extract to.
928 `members' is optional and must be a subset of the list returned
929 by namelist().
930 """
931 if members is None:
932 members = self.namelist()
933
934 for zipinfo in members:
935 self.extract(zipinfo, path, pwd)
936
937 def _extract_member(self, member, targetpath, pwd):
938 """Extract the ZipInfo object 'member' to a physical
939 file on the path targetpath.
940 """
941 # build the destination pathname, replacing
942 # forward slashes to platform specific separators.
943 if targetpath[-1:] == "/":
944 targetpath = targetpath[:-1]
945
946 # don't include leading "/" from file name if present
947 if os.path.isabs(member.filename):
948 targetpath = os.path.join(targetpath, member.filename[1:])
949 else:
950 targetpath = os.path.join(targetpath, member.filename)
951
952 targetpath = os.path.normpath(targetpath)
953
954 # Create all upper directories if necessary.
955 upperdirs = os.path.dirname(targetpath)
956 if upperdirs and not os.path.exists(upperdirs):
957 os.makedirs(upperdirs)
958
Georg Brandl112aa502008-05-20 08:25:48 +0000959 source = self.open(member, pwd=pwd)
Georg Brandl62416bc2008-01-07 18:47:44 +0000960 target = file(targetpath, "wb")
961 shutil.copyfileobj(source, target)
962 source.close()
963 target.close()
964
965 return targetpath
966
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000967 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000968 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000969 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000970 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000971 print "Duplicate name:", zinfo.filename
972 if self.mode not in ("w", "a"):
973 raise RuntimeError, 'write() requires mode "w" or "a"'
974 if not self.fp:
975 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000976 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000977 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
978 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000979 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000980 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
981 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000982 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000983 if zinfo.file_size > ZIP64_LIMIT:
984 if not self._allowZip64:
985 raise LargeZipFile("Filesize would require ZIP64 extensions")
986 if zinfo.header_offset > ZIP64_LIMIT:
987 if not self._allowZip64:
988 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000989
990 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000991 """Put the bytes from filename into the archive under the name
992 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000993 if not self.fp:
994 raise RuntimeError(
995 "Attempt to write to ZIP archive that was already closed")
996
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000997 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000998 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000999 date_time = mtime[0:6]
1000 # Create ZipInfo instance to store file information
1001 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001002 arcname = filename
1003 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1004 while arcname[0] in (os.sep, os.altsep):
1005 arcname = arcname[1:]
1006 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001007 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001008 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001009 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001010 else:
Tim Peterse1190062001-01-15 03:34:38 +00001011 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001012
1013 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001014 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001015 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001016
1017 self._writecheck(zinfo)
1018 self._didModify = True
1019 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +00001020 # Must overwrite CRC and sizes with correct data later
1021 zinfo.CRC = CRC = 0
1022 zinfo.compress_size = compress_size = 0
1023 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001024 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001025 if zinfo.compress_type == ZIP_DEFLATED:
1026 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1027 zlib.DEFLATED, -15)
1028 else:
1029 cmpr = None
1030 while 1:
1031 buf = fp.read(1024 * 8)
1032 if not buf:
1033 break
1034 file_size = file_size + len(buf)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001035 CRC = crc32(buf, CRC) & 0xffffffff
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001036 if cmpr:
1037 buf = cmpr.compress(buf)
1038 compress_size = compress_size + len(buf)
1039 self.fp.write(buf)
1040 fp.close()
1041 if cmpr:
1042 buf = cmpr.flush()
1043 compress_size = compress_size + len(buf)
1044 self.fp.write(buf)
1045 zinfo.compress_size = compress_size
1046 else:
1047 zinfo.compress_size = file_size
1048 zinfo.CRC = CRC
1049 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001050 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001051 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001052 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001053 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001054 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001055 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001056 self.filelist.append(zinfo)
1057 self.NameToInfo[zinfo.filename] = zinfo
1058
Just van Rossumb083cb32002-12-12 12:23:32 +00001059 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +00001060 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001061 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1062 the name of the file in the archive."""
1063 if not isinstance(zinfo_or_arcname, ZipInfo):
1064 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001065 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001066 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001067 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001068 else:
1069 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001070
1071 if not self.fp:
1072 raise RuntimeError(
1073 "Attempt to write to ZIP archive that was already closed")
1074
Tim Peterse1190062001-01-15 03:34:38 +00001075 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001076 zinfo.header_offset = self.fp.tell() # Start of header bytes
1077 self._writecheck(zinfo)
1078 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001079 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001080 if zinfo.compress_type == ZIP_DEFLATED:
1081 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1082 zlib.DEFLATED, -15)
1083 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001084 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001085 else:
1086 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +00001087 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001088 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001089 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001090 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001091 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001092 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +00001093 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001094 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001095 self.filelist.append(zinfo)
1096 self.NameToInfo[zinfo.filename] = zinfo
1097
1098 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001099 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001100 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001101
1102 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001103 """Close the file, and for mode "w" and "a" write the ending
1104 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001105 if self.fp is None:
1106 return
Tim Petersa608bb22006-06-15 18:06:29 +00001107
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001108 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001109 count = 0
1110 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001111 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001112 count = count + 1
1113 dt = zinfo.date_time
1114 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001115 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001116 extra = []
1117 if zinfo.file_size > ZIP64_LIMIT \
1118 or zinfo.compress_size > ZIP64_LIMIT:
1119 extra.append(zinfo.file_size)
1120 extra.append(zinfo.compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001121 file_size = 0xffffffff
1122 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001123 else:
1124 file_size = zinfo.file_size
1125 compress_size = zinfo.compress_size
1126
1127 if zinfo.header_offset > ZIP64_LIMIT:
1128 extra.append(zinfo.header_offset)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001129 header_offset = 0xffffffffL
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001130 else:
1131 header_offset = zinfo.header_offset
1132
1133 extra_data = zinfo.extra
1134 if extra:
1135 # Append a ZIP64 field to the extra's
1136 extra_data = struct.pack(
Gregory P. Smithb89a0962008-03-19 01:46:10 +00001137 '<HH' + 'Q'*len(extra),
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001138 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001139
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001140 extract_version = max(45, zinfo.extract_version)
1141 create_version = max(45, zinfo.create_version)
1142 else:
1143 extract_version = zinfo.extract_version
1144 create_version = zinfo.create_version
1145
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001146 try:
Martin v. Löwis471617d2008-05-05 17:16:58 +00001147 filename, flag_bits = zinfo._encodeFilenameFlags()
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001148 centdir = struct.pack(structCentralDir,
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001149 stringCentralDir, create_version,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001150 zinfo.create_system, extract_version, zinfo.reserved,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001151 flag_bits, zinfo.compress_type, dostime, dosdate,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001152 zinfo.CRC, compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001153 len(filename), len(extra_data), len(zinfo.comment),
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001154 0, zinfo.internal_attr, zinfo.external_attr,
1155 header_offset)
1156 except DeprecationWarning:
1157 print >>sys.stderr, (structCentralDir,
1158 stringCentralDir, create_version,
1159 zinfo.create_system, extract_version, zinfo.reserved,
1160 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1161 zinfo.CRC, compress_size, file_size,
1162 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1163 0, zinfo.internal_attr, zinfo.external_attr,
1164 header_offset)
1165 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001166 self.fp.write(centdir)
Martin v. Löwis471617d2008-05-05 17:16:58 +00001167 self.fp.write(filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001168 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001169 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001170
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001171 pos2 = self.fp.tell()
1172 # Write end-of-zip-archive record
Martin v. Löwis8c436412008-07-03 12:51:14 +00001173 centDirOffset = pos1
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001174 if pos1 > ZIP64_LIMIT:
1175 # Need to write the ZIP64 end-of-archive records
1176 zip64endrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001177 structEndArchive64, stringEndArchive64,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001178 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1179 self.fp.write(zip64endrec)
1180
1181 zip64locrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001182 structEndArchive64Locator,
1183 stringEndArchive64Locator, 0, pos2, 1)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001184 self.fp.write(zip64locrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001185 centDirOffset = 0xFFFFFFFF
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001186
Martin v. Löwis8c436412008-07-03 12:51:14 +00001187 # check for valid comment length
1188 if len(self.comment) >= ZIP_MAX_COMMENT:
1189 if self.debug > 0:
1190 msg = 'Archive comment is too long; truncating to %d bytes' \
1191 % ZIP_MAX_COMMENT
1192 self.comment = self.comment[:ZIP_MAX_COMMENT]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001193
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001194 endrec = struct.pack(structEndArchive, stringEndArchive,
Martin v. Löwis8c436412008-07-03 12:51:14 +00001195 0, 0, count % ZIP_FILECOUNT_LIMIT,
1196 count % ZIP_FILECOUNT_LIMIT, pos2 - pos1,
1197 centDirOffset, len(self.comment))
1198 self.fp.write(endrec)
1199 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001200 self.fp.flush()
Martin v. Löwis8c436412008-07-03 12:51:14 +00001201
Fred Drake3d9091e2001-03-26 15:49:24 +00001202 if not self._filePassed:
1203 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001204 self.fp = None
1205
1206
1207class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001208 """Class to create ZIP archives with Python library files and packages."""
1209
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001210 def writepy(self, pathname, basename = ""):
1211 """Add all files from "pathname" to the ZIP archive.
1212
Fred Drake484d7352000-10-02 21:14:52 +00001213 If pathname is a package directory, search the directory and
1214 all package subdirectories recursively for all *.py and enter
1215 the modules into the archive. If pathname is a plain
1216 directory, listdir *.py and enter all modules. Else, pathname
1217 must be a Python *.py file and the module will be put into the
1218 archive. Added modules are always module.pyo or module.pyc.
1219 This method will compile the module.py into module.pyc if
1220 necessary.
1221 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001222 dir, name = os.path.split(pathname)
1223 if os.path.isdir(pathname):
1224 initname = os.path.join(pathname, "__init__.py")
1225 if os.path.isfile(initname):
1226 # This is a package directory, add it
1227 if basename:
1228 basename = "%s/%s" % (basename, name)
1229 else:
1230 basename = name
1231 if self.debug:
1232 print "Adding package in", pathname, "as", basename
1233 fname, arcname = self._get_codename(initname[0:-3], basename)
1234 if self.debug:
1235 print "Adding", arcname
1236 self.write(fname, arcname)
1237 dirlist = os.listdir(pathname)
1238 dirlist.remove("__init__.py")
1239 # Add all *.py files and package subdirectories
1240 for filename in dirlist:
1241 path = os.path.join(pathname, filename)
1242 root, ext = os.path.splitext(filename)
1243 if os.path.isdir(path):
1244 if os.path.isfile(os.path.join(path, "__init__.py")):
1245 # This is a package directory, add it
1246 self.writepy(path, basename) # Recursive call
1247 elif ext == ".py":
1248 fname, arcname = self._get_codename(path[0:-3],
1249 basename)
1250 if self.debug:
1251 print "Adding", arcname
1252 self.write(fname, arcname)
1253 else:
1254 # This is NOT a package directory, add its files at top level
1255 if self.debug:
1256 print "Adding files from directory", pathname
1257 for filename in os.listdir(pathname):
1258 path = os.path.join(pathname, filename)
1259 root, ext = os.path.splitext(filename)
1260 if ext == ".py":
1261 fname, arcname = self._get_codename(path[0:-3],
1262 basename)
1263 if self.debug:
1264 print "Adding", arcname
1265 self.write(fname, arcname)
1266 else:
1267 if pathname[-3:] != ".py":
1268 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001269 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001270 fname, arcname = self._get_codename(pathname[0:-3], basename)
1271 if self.debug:
1272 print "Adding file", arcname
1273 self.write(fname, arcname)
1274
1275 def _get_codename(self, pathname, basename):
1276 """Return (filename, archivename) for the path.
1277
Fred Drake484d7352000-10-02 21:14:52 +00001278 Given a module name path, return the correct file path and
1279 archive name, compiling if necessary. For example, given
1280 /python/lib/string, return (/python/lib/string.pyc, string).
1281 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001282 file_py = pathname + ".py"
1283 file_pyc = pathname + ".pyc"
1284 file_pyo = pathname + ".pyo"
1285 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001286 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001287 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001288 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001289 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001290 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001291 if self.debug:
1292 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001293 try:
1294 py_compile.compile(file_py, file_pyc, None, True)
1295 except py_compile.PyCompileError,err:
1296 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001297 fname = file_pyc
1298 else:
1299 fname = file_pyc
1300 archivename = os.path.split(fname)[1]
1301 if basename:
1302 archivename = "%s/%s" % (basename, archivename)
1303 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001304
1305
1306def main(args = None):
1307 import textwrap
1308 USAGE=textwrap.dedent("""\
1309 Usage:
1310 zipfile.py -l zipfile.zip # Show listing of a zipfile
1311 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1312 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1313 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1314 """)
1315 if args is None:
1316 args = sys.argv[1:]
1317
1318 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1319 print USAGE
1320 sys.exit(1)
1321
1322 if args[0] == '-l':
1323 if len(args) != 2:
1324 print USAGE
1325 sys.exit(1)
1326 zf = ZipFile(args[1], 'r')
1327 zf.printdir()
1328 zf.close()
1329
1330 elif args[0] == '-t':
1331 if len(args) != 2:
1332 print USAGE
1333 sys.exit(1)
1334 zf = ZipFile(args[1], 'r')
1335 zf.testzip()
1336 print "Done testing"
1337
1338 elif args[0] == '-e':
1339 if len(args) != 3:
1340 print USAGE
1341 sys.exit(1)
1342
1343 zf = ZipFile(args[1], 'r')
1344 out = args[2]
1345 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001346 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001347 tgt = os.path.join(out, path[2:])
1348 else:
1349 tgt = os.path.join(out, path)
1350
1351 tgtdir = os.path.dirname(tgt)
1352 if not os.path.exists(tgtdir):
1353 os.makedirs(tgtdir)
1354 fp = open(tgt, 'wb')
1355 fp.write(zf.read(path))
1356 fp.close()
1357 zf.close()
1358
1359 elif args[0] == '-c':
1360 if len(args) < 3:
1361 print USAGE
1362 sys.exit(1)
1363
1364 def addToZip(zf, path, zippath):
1365 if os.path.isfile(path):
1366 zf.write(path, zippath, ZIP_DEFLATED)
1367 elif os.path.isdir(path):
1368 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001369 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001370 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001371 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001372
1373 zf = ZipFile(args[1], 'w', allowZip64=True)
1374 for src in args[2:]:
1375 addToZip(zf, src, os.path.basename(src))
1376
1377 zf.close()
1378
1379if __name__ == "__main__":
1380 main()