blob: 85012d87c4b2a1c1e294990944db1fb522c284b9 [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Ronald Oussoren143cefb2006-06-15 08:14:18 +00005import binascii, cStringIO
Guido van Rossum32abe6f2000-03-31 17:30:02 +00006
7try:
Tim Peterse1190062001-01-15 03:34:38 +00008 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +00009 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000010except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000011 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000012 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013
Skip Montanaro40fc1602001-03-01 04:27:19 +000014__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000015 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000016
Fred Drake5db246d2000-09-29 20:44:48 +000017class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000019
20
21class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000022 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000023 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
24 and those extensions are disabled.
25 """
26
Tim Peterse1190062001-01-15 03:34:38 +000027error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Amaury Forgeot d'Arc0293c802009-01-17 16:46:35 +000029ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000030ZIP_FILECOUNT_LIMIT = 1 << 16
31ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000032
Guido van Rossum32abe6f2000-03-31 17:30:02 +000033# constants for Zip file compression methods
34ZIP_STORED = 0
35ZIP_DEFLATED = 8
36# Other ZIP compression methods not supported
37
Martin v. Löwis8c436412008-07-03 12:51:14 +000038# Below are some formats and associated data for reading/writing headers using
39# the struct module. The names and structures of headers/records are those used
40# in the PKWARE description of the ZIP file format:
41# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
42# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000043
Martin v. Löwis8c436412008-07-03 12:51:14 +000044# The "end of central directory" structure, magic number, size, and indices
45# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000046structEndArchive = "<4s4H2LH"
47stringEndArchive = "PK\005\006"
48sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000049
50_ECD_SIGNATURE = 0
51_ECD_DISK_NUMBER = 1
52_ECD_DISK_START = 2
53_ECD_ENTRIES_THIS_DISK = 3
54_ECD_ENTRIES_TOTAL = 4
55_ECD_SIZE = 5
56_ECD_OFFSET = 6
57_ECD_COMMENT_SIZE = 7
58# These last two indices are not part of the structure as defined in the
59# spec, but they are used internally by this module as a convenience
60_ECD_COMMENT = 8
61_ECD_LOCATION = 9
62
63# The "central directory" structure, magic number, size, and indices
64# of entries in the structure (section V.F in the format document)
65structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000066stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000067sizeCentralDir = struct.calcsize(structCentralDir)
68
Fred Drake3e038e52001-02-28 17:56:26 +000069# indexes of entries in the central directory structure
70_CD_SIGNATURE = 0
71_CD_CREATE_VERSION = 1
72_CD_CREATE_SYSTEM = 2
73_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000074_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000075_CD_FLAG_BITS = 5
76_CD_COMPRESS_TYPE = 6
77_CD_TIME = 7
78_CD_DATE = 8
79_CD_CRC = 9
80_CD_COMPRESSED_SIZE = 10
81_CD_UNCOMPRESSED_SIZE = 11
82_CD_FILENAME_LENGTH = 12
83_CD_EXTRA_FIELD_LENGTH = 13
84_CD_COMMENT_LENGTH = 14
85_CD_DISK_NUMBER_START = 15
86_CD_INTERNAL_FILE_ATTRIBUTES = 16
87_CD_EXTERNAL_FILE_ATTRIBUTES = 17
88_CD_LOCAL_HEADER_OFFSET = 18
89
Martin v. Löwis8c436412008-07-03 12:51:14 +000090# The "local file header" structure, magic number, size, and indices
91# (section V.A in the format document)
92structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000093stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000094sizeFileHeader = struct.calcsize(structFileHeader)
95
Fred Drake3e038e52001-02-28 17:56:26 +000096_FH_SIGNATURE = 0
97_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000098_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +000099_FH_GENERAL_PURPOSE_FLAG_BITS = 3
100_FH_COMPRESSION_METHOD = 4
101_FH_LAST_MOD_TIME = 5
102_FH_LAST_MOD_DATE = 6
103_FH_CRC = 7
104_FH_COMPRESSED_SIZE = 8
105_FH_UNCOMPRESSED_SIZE = 9
106_FH_FILENAME_LENGTH = 10
107_FH_EXTRA_FIELD_LENGTH = 11
108
Martin v. Löwis8c436412008-07-03 12:51:14 +0000109# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000110structEndArchive64Locator = "<4sLQL"
111stringEndArchive64Locator = "PK\x06\x07"
112sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000113
114# The "Zip64 end of central directory" record, magic number, size, and indices
115# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000116structEndArchive64 = "<4sQ2H2L4Q"
117stringEndArchive64 = "PK\x06\x06"
118sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000119
120_CD64_SIGNATURE = 0
121_CD64_DIRECTORY_RECSIZE = 1
122_CD64_CREATE_VERSION = 2
123_CD64_EXTRACT_VERSION = 3
124_CD64_DISK_NUMBER = 4
125_CD64_DISK_NUMBER_START = 5
126_CD64_NUMBER_ENTRIES_THIS_DISK = 6
127_CD64_NUMBER_ENTRIES_TOTAL = 7
128_CD64_DIRECTORY_SIZE = 8
129_CD64_OFFSET_START_CENTDIR = 9
130
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000131def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000132 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000133 try:
134 fpin = open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000135 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000136 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000137 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000138 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000139 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000141 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000142
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000143def _EndRecData64(fpin, offset, endrec):
144 """
145 Read the ZIP64 end-of-archive records and use that to update endrec
146 """
Martin v. Löwis8c436412008-07-03 12:51:14 +0000147 fpin.seek(offset - sizeEndCentDir64Locator, 2)
148 data = fpin.read(sizeEndCentDir64Locator)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000149 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
150 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000151 return endrec
152
153 if diskno != 0 or disks != 1:
154 raise BadZipfile("zipfiles that span multiple disks are not supported")
155
Tim Petersa608bb22006-06-15 18:06:29 +0000156 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000157 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
158 data = fpin.read(sizeEndCentDir64)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000159 sig, sz, create_version, read_version, disk_num, disk_dir, \
160 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000161 struct.unpack(structEndArchive64, data)
162 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000163 return endrec
164
165 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000166 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000167 endrec[_ECD_DISK_NUMBER] = disk_num
168 endrec[_ECD_DISK_START] = disk_dir
169 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
170 endrec[_ECD_ENTRIES_TOTAL] = dircount2
171 endrec[_ECD_SIZE] = dirsize
172 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000173 return endrec
174
175
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000176def _EndRecData(fpin):
177 """Return data from the "End of Central Directory" record, or None.
178
179 The data is a list of the nine items in the ZIP "End of central dir"
180 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000181
182 # Determine file size
183 fpin.seek(0, 2)
184 filesize = fpin.tell()
185
186 # Check to see if this is ZIP file with no archive comment (the
187 # "end of central directory" structure should be the last item in the
188 # file if this is the case).
189 fpin.seek(-sizeEndCentDir, 2)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000190 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000191 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
Martin v. Löwis8c436412008-07-03 12:51:14 +0000192 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000193 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000194 endrec=list(endrec)
195
196 # Append a blank comment and record start offset
197 endrec.append("")
198 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000199
Amaury Forgeot d'Arc945fdd62009-01-18 20:27:45 +0000200 # Try to read the "Zip64 end of central directory" structure
201 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000202
203 # Either this is not a ZIP file, or it is a ZIP file with an archive
204 # comment. Search the end of the file for the "end of central directory"
205 # record signature. The comment is the last item in the ZIP file and may be
206 # up to 64K long. It is assumed that the "end of central directory" magic
207 # number does not appear in the comment.
208 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
209 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000210 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000211 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000212 if start >= 0:
213 # found the magic number; attempt to unpack and interpret
214 recData = data[start:start+sizeEndCentDir]
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000215 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwis8c436412008-07-03 12:51:14 +0000216 comment = data[start+sizeEndCentDir:]
217 # check that comment length is correct
218 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000219 # Append the archive comment and start offset
220 endrec.append(comment)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000221 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arc945fdd62009-01-18 20:27:45 +0000222
223 # Try to read the "Zip64 end of central directory" structure
224 return _EndRecData64(fpin, maxCommentStart + start - filesize,
225 endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000226
227 # Unable to find a valid end of central directory structure
228 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000229
Fred Drake484d7352000-10-02 21:14:52 +0000230
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000231class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000232 """Class with attributes describing each file in the ZIP archive."""
233
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000234 __slots__ = (
235 'orig_filename',
236 'filename',
237 'date_time',
238 'compress_type',
239 'comment',
240 'extra',
241 'create_system',
242 'create_version',
243 'extract_version',
244 'reserved',
245 'flag_bits',
246 'volume',
247 'internal_attr',
248 'external_attr',
249 'header_offset',
250 'CRC',
251 'compress_size',
252 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000253 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000254 )
255
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000256 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000257 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000258
259 # Terminate the file name at the first null byte. Null bytes in file
260 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000261 null_byte = filename.find(chr(0))
262 if null_byte >= 0:
263 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000264 # This is used to ensure paths in generated ZIP files always use
265 # forward slashes as the directory separator, as required by the
266 # ZIP format specification.
267 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000268 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000269
Greg Ward8e36d282003-06-18 00:53:06 +0000270 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000271 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000272 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000273 self.compress_type = ZIP_STORED # Type of compression for the file
274 self.comment = "" # Comment for each file
275 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000276 if sys.platform == 'win32':
277 self.create_system = 0 # System which created ZIP archive
278 else:
279 # Assume everything else is unix-y
280 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000281 self.create_version = 20 # Version which created ZIP archive
282 self.extract_version = 20 # Version needed to extract archive
283 self.reserved = 0 # Must be zero
284 self.flag_bits = 0 # ZIP flag bits
285 self.volume = 0 # Volume number of file header
286 self.internal_attr = 0 # Internal attributes
287 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000288 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000289 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000290 # CRC CRC-32 of the uncompressed file
291 # compress_size Size of the compressed file
292 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000293
294 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000295 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000296 dt = self.date_time
297 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000298 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000299 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000300 # Set these to zero because we write them after the file data
301 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000302 else:
Tim Peterse1190062001-01-15 03:34:38 +0000303 CRC = self.CRC
304 compress_size = self.compress_size
305 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000306
307 extra = self.extra
308
309 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
310 # File is larger than what fits into a 4 byte integer,
311 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000312 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000313 extra = extra + struct.pack(fmt,
314 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000315 file_size = 0xffffffff
316 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000317 self.extract_version = max(45, self.extract_version)
318 self.create_version = max(45, self.extract_version)
319
Martin v. Löwis471617d2008-05-05 17:16:58 +0000320 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000321 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000322 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000323 self.compress_type, dostime, dosdate, CRC,
324 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000325 len(filename), len(extra))
326 return header + filename + extra
327
328 def _encodeFilenameFlags(self):
329 if isinstance(self.filename, unicode):
330 try:
331 return self.filename.encode('ascii'), self.flag_bits
332 except UnicodeEncodeError:
333 return self.filename.encode('utf-8'), self.flag_bits | 0x800
334 else:
335 return self.filename, self.flag_bits
336
337 def _decodeFilename(self):
338 if self.flag_bits & 0x800:
339 return self.filename.decode('utf-8')
340 else:
341 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000342
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000343 def _decodeExtra(self):
344 # Try to decode the extra field.
345 extra = self.extra
346 unpack = struct.unpack
347 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000348 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000349 if tp == 1:
350 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000351 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000352 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000353 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000354 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000355 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000356 elif ln == 0:
357 counts = ()
358 else:
359 raise RuntimeError, "Corrupt extra field %s"%(ln,)
360
361 idx = 0
362
363 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000364 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000365 self.file_size = counts[idx]
366 idx += 1
367
Martin v. Löwis8c436412008-07-03 12:51:14 +0000368 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000369 self.compress_size = counts[idx]
370 idx += 1
371
Martin v. Löwis8c436412008-07-03 12:51:14 +0000372 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000373 old = self.header_offset
374 self.header_offset = counts[idx]
375 idx+=1
376
377 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000378
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000379
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000380class _ZipDecrypter:
381 """Class to handle decryption of files stored within a ZIP archive.
382
383 ZIP supports a password-based form of encryption. Even though known
384 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000385 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000386
387 Usage:
388 zd = _ZipDecrypter(mypwd)
389 plain_char = zd(cypher_char)
390 plain_text = map(zd, cypher_text)
391 """
392
393 def _GenerateCRCTable():
394 """Generate a CRC-32 table.
395
396 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
397 internal keys. We noticed that a direct implementation is faster than
398 relying on binascii.crc32().
399 """
400 poly = 0xedb88320
401 table = [0] * 256
402 for i in range(256):
403 crc = i
404 for j in range(8):
405 if crc & 1:
406 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
407 else:
408 crc = ((crc >> 1) & 0x7FFFFFFF)
409 table[i] = crc
410 return table
411 crctable = _GenerateCRCTable()
412
413 def _crc32(self, ch, crc):
414 """Compute the CRC32 primitive on one byte."""
415 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
416
417 def __init__(self, pwd):
418 self.key0 = 305419896
419 self.key1 = 591751049
420 self.key2 = 878082192
421 for p in pwd:
422 self._UpdateKeys(p)
423
424 def _UpdateKeys(self, c):
425 self.key0 = self._crc32(c, self.key0)
426 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
427 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
428 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
429
430 def __call__(self, c):
431 """Decrypt a single character."""
432 c = ord(c)
433 k = self.key2 | 2
434 c = c ^ (((k * (k^1)) >> 8) & 255)
435 c = chr(c)
436 self._UpdateKeys(c)
437 return c
438
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000439class ZipExtFile:
440 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000441 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000442 """
Tim Petersea5962f2007-03-12 18:07:52 +0000443
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000444 def __init__(self, fileobj, zipinfo, decrypt=None):
445 self.fileobj = fileobj
446 self.decrypter = decrypt
447 self.bytes_read = 0L
448 self.rawbuffer = ''
449 self.readbuffer = ''
450 self.linebuffer = ''
451 self.eof = False
452 self.univ_newlines = False
453 self.nlSeps = ("\n", )
454 self.lastdiscard = ''
455
456 self.compress_type = zipinfo.compress_type
457 self.compress_size = zipinfo.compress_size
Tim Petersea5962f2007-03-12 18:07:52 +0000458
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000459 self.closed = False
460 self.mode = "r"
461 self.name = zipinfo.filename
462
463 # read from compressed files in 64k blocks
464 self.compreadsize = 64*1024
465 if self.compress_type == ZIP_DEFLATED:
466 self.dc = zlib.decompressobj(-15)
467
468 def set_univ_newlines(self, univ_newlines):
469 self.univ_newlines = univ_newlines
Tim Petersea5962f2007-03-12 18:07:52 +0000470
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000471 # pick line separator char(s) based on universal newlines flag
472 self.nlSeps = ("\n", )
473 if self.univ_newlines:
474 self.nlSeps = ("\r\n", "\r", "\n")
475
476 def __iter__(self):
477 return self
Tim Petersea5962f2007-03-12 18:07:52 +0000478
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000479 def next(self):
480 nextline = self.readline()
481 if not nextline:
482 raise StopIteration()
483
484 return nextline
485
486 def close(self):
487 self.closed = True
488
489 def _checkfornewline(self):
490 nl, nllen = -1, -1
491 if self.linebuffer:
492 # ugly check for cases where half of an \r\n pair was
493 # read on the last pass, and the \r was discarded. In this
494 # case we just throw away the \n at the start of the buffer.
495 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
496 self.linebuffer = self.linebuffer[1:]
497
Tim Petersea5962f2007-03-12 18:07:52 +0000498 for sep in self.nlSeps:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000499 nl = self.linebuffer.find(sep)
500 if nl >= 0:
501 nllen = len(sep)
502 return nl, nllen
503
504 return nl, nllen
Tim Petersea5962f2007-03-12 18:07:52 +0000505
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000506 def readline(self, size = -1):
507 """Read a line with approx. size. If size is negative,
Tim Petersea5962f2007-03-12 18:07:52 +0000508 read a whole line.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000509 """
510 if size < 0:
511 size = sys.maxint
512 elif size == 0:
513 return ''
514
515 # check for a newline already in buffer
516 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000517
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000518 if nl >= 0:
519 # the next line was already in the buffer
520 nl = min(nl, size)
521 else:
522 # no line break in buffer - try to read more
523 size -= len(self.linebuffer)
524 while nl < 0 and size > 0:
525 buf = self.read(min(size, 100))
526 if not buf:
527 break
528 self.linebuffer += buf
529 size -= len(buf)
530
531 # check for a newline in buffer
532 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000533
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000534 # we either ran out of bytes in the file, or
535 # met the specified size limit without finding a newline,
536 # so return current buffer
537 if nl < 0:
538 s = self.linebuffer
539 self.linebuffer = ''
540 return s
541
542 buf = self.linebuffer[:nl]
543 self.lastdiscard = self.linebuffer[nl:nl + nllen]
544 self.linebuffer = self.linebuffer[nl + nllen:]
545
546 # line is always returned with \n as newline char (except possibly
547 # for a final incomplete line in the file, which is handled above).
548 return buf + "\n"
549
550 def readlines(self, sizehint = -1):
551 """Return a list with all (following) lines. The sizehint parameter
552 is ignored in this implementation.
553 """
554 result = []
555 while True:
556 line = self.readline()
557 if not line: break
558 result.append(line)
559 return result
560
561 def read(self, size = None):
562 # act like file() obj and return empty string if size is 0
563 if size == 0:
564 return ''
565
566 # determine read size
567 bytesToRead = self.compress_size - self.bytes_read
568
569 # adjust read size for encrypted files since the first 12 bytes
570 # are for the encryption/password information
571 if self.decrypter is not None:
572 bytesToRead -= 12
573
574 if size is not None and size >= 0:
575 if self.compress_type == ZIP_STORED:
576 lr = len(self.readbuffer)
577 bytesToRead = min(bytesToRead, size - lr)
578 elif self.compress_type == ZIP_DEFLATED:
579 if len(self.readbuffer) > size:
580 # the user has requested fewer bytes than we've already
581 # pulled through the decompressor; don't read any more
582 bytesToRead = 0
583 else:
584 # user will use up the buffer, so read some more
585 lr = len(self.rawbuffer)
586 bytesToRead = min(bytesToRead, self.compreadsize - lr)
587
588 # avoid reading past end of file contents
589 if bytesToRead + self.bytes_read > self.compress_size:
590 bytesToRead = self.compress_size - self.bytes_read
591
592 # try to read from file (if necessary)
593 if bytesToRead > 0:
594 bytes = self.fileobj.read(bytesToRead)
595 self.bytes_read += len(bytes)
596 self.rawbuffer += bytes
597
598 # handle contents of raw buffer
599 if self.rawbuffer:
600 newdata = self.rawbuffer
601 self.rawbuffer = ''
602
603 # decrypt new data if we were given an object to handle that
604 if newdata and self.decrypter is not None:
605 newdata = ''.join(map(self.decrypter, newdata))
606
607 # decompress newly read data if necessary
608 if newdata and self.compress_type == ZIP_DEFLATED:
609 newdata = self.dc.decompress(newdata)
610 self.rawbuffer = self.dc.unconsumed_tail
611 if self.eof and len(self.rawbuffer) == 0:
Tim Petersea5962f2007-03-12 18:07:52 +0000612 # we're out of raw bytes (both from the file and
613 # the local buffer); flush just to make sure the
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000614 # decompressor is done
615 newdata += self.dc.flush()
616 # prevent decompressor from being used again
617 self.dc = None
618
619 self.readbuffer += newdata
620
621
622 # return what the user asked for
623 if size is None or len(self.readbuffer) <= size:
624 bytes = self.readbuffer
625 self.readbuffer = ''
626 else:
627 bytes = self.readbuffer[:size]
628 self.readbuffer = self.readbuffer[size:]
629
630 return bytes
Tim Petersea5962f2007-03-12 18:07:52 +0000631
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000632
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000633class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000634 """ Class with methods to open, read, write, close, list zip files.
635
Martin v. Löwis8c436412008-07-03 12:51:14 +0000636 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000637
Fred Drake3d9091e2001-03-26 15:49:24 +0000638 file: Either the path to the file, or a file-like object.
639 If it is a path, the file will be opened and closed by ZipFile.
640 mode: The mode can be either read "r", write "w" or append "a".
641 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000642 allowZip64: if True ZipFile will create files with ZIP64 extensions when
643 needed, otherwise it will raise an exception when this would
644 be necessary.
645
Fred Drake3d9091e2001-03-26 15:49:24 +0000646 """
Fred Drake484d7352000-10-02 21:14:52 +0000647
Fred Drake90eac282001-02-28 05:29:34 +0000648 fp = None # Set here since __del__ checks it
649
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000650 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000651 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000652 if mode not in ("r", "w", "a"):
653 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
654
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000655 if compression == ZIP_STORED:
656 pass
657 elif compression == ZIP_DEFLATED:
658 if not zlib:
659 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000660 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000661 else:
662 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000663
664 self._allowZip64 = allowZip64
665 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000666 self.debug = 0 # Level of printing: 0 through 3
667 self.NameToInfo = {} # Find file info given name
668 self.filelist = [] # List of ZipInfo instances for archive
669 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000670 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000671 self.pwd = None
Martin v. Löwis8c436412008-07-03 12:51:14 +0000672 self.comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000673
Fred Drake3d9091e2001-03-26 15:49:24 +0000674 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000675 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000676 self._filePassed = 0
677 self.filename = file
678 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000679 try:
680 self.fp = open(file, modeDict[mode])
681 except IOError:
682 if mode == 'a':
683 mode = key = 'w'
684 self.fp = open(file, modeDict[mode])
685 else:
686 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000687 else:
688 self._filePassed = 1
689 self.fp = file
690 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000691
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000692 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000693 self._GetContents()
694 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000695 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000696 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000697 try: # See if file is a zip file
698 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000699 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000700 self.fp.seek(self.start_dir, 0)
701 except BadZipfile: # file is not a zip file, just append
702 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000703 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000704 if not self._filePassed:
705 self.fp.close()
706 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000707 raise RuntimeError, 'Mode must be "r", "w" or "a"'
708
709 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000710 """Read the directory, making sure we close the file if the format
711 is bad."""
712 try:
713 self._RealGetContents()
714 except BadZipfile:
715 if not self._filePassed:
716 self.fp.close()
717 self.fp = None
718 raise
719
720 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000721 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000722 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000723 endrec = _EndRecData(fp)
724 if not endrec:
725 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000726 if self.debug > 1:
727 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000728 size_cd = endrec[_ECD_SIZE] # bytes in central directory
729 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
730 self.comment = endrec[_ECD_COMMENT] # archive comment
731
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000732 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000733 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000734 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
735 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000736 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
737
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000738 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000739 inferred = concat + offset_cd
740 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000741 # self.start_dir: Position of start of central directory
742 self.start_dir = offset_cd + concat
743 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000744 data = fp.read(size_cd)
745 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000746 total = 0
747 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000748 centdir = fp.read(sizeCentralDir)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000749 if centdir[0:4] != stringCentralDir:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000750 raise BadZipfile, "Bad magic number for central directory"
751 centdir = struct.unpack(structCentralDir, centdir)
752 if self.debug > 2:
753 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000754 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000755 # Create ZipInfo instance to store file information
756 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000757 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
758 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000759 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000760 (x.create_version, x.create_system, x.extract_version, x.reserved,
761 x.flag_bits, x.compress_type, t, d,
762 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
763 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
764 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000765 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000766 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000767 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000768
769 x._decodeExtra()
770 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000771 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000772 self.filelist.append(x)
773 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000774
775 # update total bytes read from central directory
776 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
777 + centdir[_CD_EXTRA_FIELD_LENGTH]
778 + centdir[_CD_COMMENT_LENGTH])
779
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000780 if self.debug > 2:
781 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000782
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000783
784 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000785 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000786 l = []
787 for data in self.filelist:
788 l.append(data.filename)
789 return l
790
791 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000792 """Return a list of class ZipInfo instances for files in the
793 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000794 return self.filelist
795
796 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000797 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000798 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
799 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000800 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000801 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
802
803 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000804 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000805 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000806 for zinfo in self.filelist:
807 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000808 # Read by chunks, to avoid an OverflowError or a
809 # MemoryError with very large embedded files.
810 f = self.open(zinfo.filename, "r")
811 while f.read(chunk_size): # Check CRC-32
812 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000813 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000814 return zinfo.filename
815
816 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000817 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000818 info = self.NameToInfo.get(name)
819 if info is None:
820 raise KeyError(
821 'There is no item named %r in the archive' % name)
822
823 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000824
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000825 def setpassword(self, pwd):
826 """Set default password for encrypted files."""
827 self.pwd = pwd
828
829 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000830 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000831 return self.open(name, "r", pwd).read()
832
833 def open(self, name, mode="r", pwd=None):
834 """Return file-like object for 'name'."""
835 if mode not in ("r", "U", "rU"):
836 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000837 if not self.fp:
838 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000839 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000840
Tim Petersea5962f2007-03-12 18:07:52 +0000841 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000842 # given a file object in the constructor
843 if self._filePassed:
844 zef_file = self.fp
845 else:
846 zef_file = open(self.filename, 'rb')
847
Georg Brandl112aa502008-05-20 08:25:48 +0000848 # Make sure we have an info object
849 if isinstance(name, ZipInfo):
850 # 'name' is already an info object
851 zinfo = name
852 else:
853 # Get info object for name
854 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000855
856 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000857
858 # Skip the file header:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000859 fheader = zef_file.read(sizeFileHeader)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000860 if fheader[0:4] != stringFileHeader:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000861 raise BadZipfile, "Bad magic number for file header"
862
863 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000864 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000865 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000866 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000867
868 if fname != zinfo.orig_filename:
869 raise BadZipfile, \
870 'File name in directory "%s" and header "%s" differ.' % (
871 zinfo.orig_filename, fname)
872
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000873 # check for encrypted flag & handle password
874 is_encrypted = zinfo.flag_bits & 0x1
875 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000876 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000877 if not pwd:
878 pwd = self.pwd
879 if not pwd:
880 raise RuntimeError, "File %s is encrypted, " \
881 "password required for extraction" % name
882
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000883 zd = _ZipDecrypter(pwd)
884 # The first 12 bytes in the cypher stream is an encryption header
885 # used to strengthen the algorithm. The first 11 bytes are
886 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000887 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000888 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000889 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000890 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000891 if zinfo.flag_bits & 0x8:
892 # compare against the file type from extended local headers
893 check_byte = (zinfo._raw_time >> 8) & 0xff
894 else:
895 # compare against the CRC otherwise
896 check_byte = (zinfo.CRC >> 24) & 0xff
897 if ord(h[11]) != check_byte:
898 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000899
900 # build and return a ZipExtFile
901 if zd is None:
902 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000903 else:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000904 zef = ZipExtFile(zef_file, zinfo, zd)
905
906 # set universal newlines on ZipExtFile if necessary
907 if "U" in mode:
908 zef.set_univ_newlines(True)
909 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000910
Georg Brandl62416bc2008-01-07 18:47:44 +0000911 def extract(self, member, path=None, pwd=None):
912 """Extract a member from the archive to the current working directory,
913 using its full name. Its file information is extracted as accurately
914 as possible. `member' may be a filename or a ZipInfo object. You can
915 specify a different directory using `path'.
916 """
917 if not isinstance(member, ZipInfo):
918 member = self.getinfo(member)
919
920 if path is None:
921 path = os.getcwd()
922
923 return self._extract_member(member, path, pwd)
924
925 def extractall(self, path=None, members=None, pwd=None):
926 """Extract all members from the archive to the current working
927 directory. `path' specifies a different directory to extract to.
928 `members' is optional and must be a subset of the list returned
929 by namelist().
930 """
931 if members is None:
932 members = self.namelist()
933
934 for zipinfo in members:
935 self.extract(zipinfo, path, pwd)
936
937 def _extract_member(self, member, targetpath, pwd):
938 """Extract the ZipInfo object 'member' to a physical
939 file on the path targetpath.
940 """
941 # build the destination pathname, replacing
942 # forward slashes to platform specific separators.
943 if targetpath[-1:] == "/":
944 targetpath = targetpath[:-1]
945
946 # don't include leading "/" from file name if present
947 if os.path.isabs(member.filename):
948 targetpath = os.path.join(targetpath, member.filename[1:])
949 else:
950 targetpath = os.path.join(targetpath, member.filename)
951
952 targetpath = os.path.normpath(targetpath)
953
954 # Create all upper directories if necessary.
955 upperdirs = os.path.dirname(targetpath)
956 if upperdirs and not os.path.exists(upperdirs):
957 os.makedirs(upperdirs)
958
Georg Brandl112aa502008-05-20 08:25:48 +0000959 source = self.open(member, pwd=pwd)
Georg Brandl62416bc2008-01-07 18:47:44 +0000960 target = file(targetpath, "wb")
961 shutil.copyfileobj(source, target)
962 source.close()
963 target.close()
964
965 return targetpath
966
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000967 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000968 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000969 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000970 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000971 print "Duplicate name:", zinfo.filename
972 if self.mode not in ("w", "a"):
973 raise RuntimeError, 'write() requires mode "w" or "a"'
974 if not self.fp:
975 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000976 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000977 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
978 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000979 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000980 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
981 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000982 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000983 if zinfo.file_size > ZIP64_LIMIT:
984 if not self._allowZip64:
985 raise LargeZipFile("Filesize would require ZIP64 extensions")
986 if zinfo.header_offset > ZIP64_LIMIT:
987 if not self._allowZip64:
988 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000989
990 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000991 """Put the bytes from filename into the archive under the name
992 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000993 if not self.fp:
994 raise RuntimeError(
995 "Attempt to write to ZIP archive that was already closed")
996
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000997 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000998 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000999 date_time = mtime[0:6]
1000 # Create ZipInfo instance to store file information
1001 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001002 arcname = filename
1003 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1004 while arcname[0] in (os.sep, os.altsep):
1005 arcname = arcname[1:]
1006 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001007 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001008 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001009 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001010 else:
Tim Peterse1190062001-01-15 03:34:38 +00001011 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001012
1013 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001014 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001015 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001016
1017 self._writecheck(zinfo)
1018 self._didModify = True
1019 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +00001020 # Must overwrite CRC and sizes with correct data later
1021 zinfo.CRC = CRC = 0
1022 zinfo.compress_size = compress_size = 0
1023 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001024 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001025 if zinfo.compress_type == ZIP_DEFLATED:
1026 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1027 zlib.DEFLATED, -15)
1028 else:
1029 cmpr = None
1030 while 1:
1031 buf = fp.read(1024 * 8)
1032 if not buf:
1033 break
1034 file_size = file_size + len(buf)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001035 CRC = crc32(buf, CRC) & 0xffffffff
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001036 if cmpr:
1037 buf = cmpr.compress(buf)
1038 compress_size = compress_size + len(buf)
1039 self.fp.write(buf)
1040 fp.close()
1041 if cmpr:
1042 buf = cmpr.flush()
1043 compress_size = compress_size + len(buf)
1044 self.fp.write(buf)
1045 zinfo.compress_size = compress_size
1046 else:
1047 zinfo.compress_size = file_size
1048 zinfo.CRC = CRC
1049 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001050 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001051 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001052 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001053 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001054 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001055 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001056 self.filelist.append(zinfo)
1057 self.NameToInfo[zinfo.filename] = zinfo
1058
Just van Rossumb083cb32002-12-12 12:23:32 +00001059 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +00001060 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001061 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1062 the name of the file in the archive."""
1063 if not isinstance(zinfo_or_arcname, ZipInfo):
1064 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001065 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001066 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001067 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001068 else:
1069 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001070
1071 if not self.fp:
1072 raise RuntimeError(
1073 "Attempt to write to ZIP archive that was already closed")
1074
Tim Peterse1190062001-01-15 03:34:38 +00001075 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001076 zinfo.header_offset = self.fp.tell() # Start of header bytes
1077 self._writecheck(zinfo)
1078 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001079 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001080 if zinfo.compress_type == ZIP_DEFLATED:
1081 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1082 zlib.DEFLATED, -15)
1083 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001084 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001085 else:
1086 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +00001087 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001088 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001089 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001090 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001091 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001092 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +00001093 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001094 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001095 self.filelist.append(zinfo)
1096 self.NameToInfo[zinfo.filename] = zinfo
1097
1098 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001099 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001100 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001101
1102 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001103 """Close the file, and for mode "w" and "a" write the ending
1104 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001105 if self.fp is None:
1106 return
Tim Petersa608bb22006-06-15 18:06:29 +00001107
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001108 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001109 count = 0
1110 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001111 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001112 count = count + 1
1113 dt = zinfo.date_time
1114 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001115 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001116 extra = []
1117 if zinfo.file_size > ZIP64_LIMIT \
1118 or zinfo.compress_size > ZIP64_LIMIT:
1119 extra.append(zinfo.file_size)
1120 extra.append(zinfo.compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001121 file_size = 0xffffffff
1122 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001123 else:
1124 file_size = zinfo.file_size
1125 compress_size = zinfo.compress_size
1126
1127 if zinfo.header_offset > ZIP64_LIMIT:
1128 extra.append(zinfo.header_offset)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001129 header_offset = 0xffffffffL
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001130 else:
1131 header_offset = zinfo.header_offset
1132
1133 extra_data = zinfo.extra
1134 if extra:
1135 # Append a ZIP64 field to the extra's
1136 extra_data = struct.pack(
Gregory P. Smithb89a0962008-03-19 01:46:10 +00001137 '<HH' + 'Q'*len(extra),
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001138 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001139
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001140 extract_version = max(45, zinfo.extract_version)
1141 create_version = max(45, zinfo.create_version)
1142 else:
1143 extract_version = zinfo.extract_version
1144 create_version = zinfo.create_version
1145
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001146 try:
Martin v. Löwis471617d2008-05-05 17:16:58 +00001147 filename, flag_bits = zinfo._encodeFilenameFlags()
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001148 centdir = struct.pack(structCentralDir,
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001149 stringCentralDir, create_version,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001150 zinfo.create_system, extract_version, zinfo.reserved,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001151 flag_bits, zinfo.compress_type, dostime, dosdate,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001152 zinfo.CRC, compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001153 len(filename), len(extra_data), len(zinfo.comment),
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001154 0, zinfo.internal_attr, zinfo.external_attr,
1155 header_offset)
1156 except DeprecationWarning:
1157 print >>sys.stderr, (structCentralDir,
1158 stringCentralDir, create_version,
1159 zinfo.create_system, extract_version, zinfo.reserved,
1160 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1161 zinfo.CRC, compress_size, file_size,
1162 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1163 0, zinfo.internal_attr, zinfo.external_attr,
1164 header_offset)
1165 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001166 self.fp.write(centdir)
Martin v. Löwis471617d2008-05-05 17:16:58 +00001167 self.fp.write(filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001168 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001169 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001170
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001171 pos2 = self.fp.tell()
1172 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0293c802009-01-17 16:46:35 +00001173 centDirCount = count
1174 centDirSize = pos2 - pos1
Martin v. Löwis8c436412008-07-03 12:51:14 +00001175 centDirOffset = pos1
Amaury Forgeot d'Arc0293c802009-01-17 16:46:35 +00001176 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1177 centDirOffset > ZIP64_LIMIT or
1178 centDirSize > ZIP64_LIMIT):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001179 # Need to write the ZIP64 end-of-archive records
1180 zip64endrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001181 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0293c802009-01-17 16:46:35 +00001182 44, 45, 45, 0, 0, centDirCount, centDirCount,
1183 centDirSize, centDirOffset)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001184 self.fp.write(zip64endrec)
1185
1186 zip64locrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001187 structEndArchive64Locator,
1188 stringEndArchive64Locator, 0, pos2, 1)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001189 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0293c802009-01-17 16:46:35 +00001190 centDirCount = min(centDirCount, 0xFFFF)
1191 centDirSize = min(centDirSize, 0xFFFFFFFF)
1192 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001193
Martin v. Löwis8c436412008-07-03 12:51:14 +00001194 # check for valid comment length
1195 if len(self.comment) >= ZIP_MAX_COMMENT:
1196 if self.debug > 0:
1197 msg = 'Archive comment is too long; truncating to %d bytes' \
1198 % ZIP_MAX_COMMENT
1199 self.comment = self.comment[:ZIP_MAX_COMMENT]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001200
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001201 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0293c802009-01-17 16:46:35 +00001202 0, 0, centDirCount, centDirCount,
1203 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwis8c436412008-07-03 12:51:14 +00001204 self.fp.write(endrec)
1205 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001206 self.fp.flush()
Martin v. Löwis8c436412008-07-03 12:51:14 +00001207
Fred Drake3d9091e2001-03-26 15:49:24 +00001208 if not self._filePassed:
1209 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001210 self.fp = None
1211
1212
1213class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001214 """Class to create ZIP archives with Python library files and packages."""
1215
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001216 def writepy(self, pathname, basename = ""):
1217 """Add all files from "pathname" to the ZIP archive.
1218
Fred Drake484d7352000-10-02 21:14:52 +00001219 If pathname is a package directory, search the directory and
1220 all package subdirectories recursively for all *.py and enter
1221 the modules into the archive. If pathname is a plain
1222 directory, listdir *.py and enter all modules. Else, pathname
1223 must be a Python *.py file and the module will be put into the
1224 archive. Added modules are always module.pyo or module.pyc.
1225 This method will compile the module.py into module.pyc if
1226 necessary.
1227 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001228 dir, name = os.path.split(pathname)
1229 if os.path.isdir(pathname):
1230 initname = os.path.join(pathname, "__init__.py")
1231 if os.path.isfile(initname):
1232 # This is a package directory, add it
1233 if basename:
1234 basename = "%s/%s" % (basename, name)
1235 else:
1236 basename = name
1237 if self.debug:
1238 print "Adding package in", pathname, "as", basename
1239 fname, arcname = self._get_codename(initname[0:-3], basename)
1240 if self.debug:
1241 print "Adding", arcname
1242 self.write(fname, arcname)
1243 dirlist = os.listdir(pathname)
1244 dirlist.remove("__init__.py")
1245 # Add all *.py files and package subdirectories
1246 for filename in dirlist:
1247 path = os.path.join(pathname, filename)
1248 root, ext = os.path.splitext(filename)
1249 if os.path.isdir(path):
1250 if os.path.isfile(os.path.join(path, "__init__.py")):
1251 # This is a package directory, add it
1252 self.writepy(path, basename) # Recursive call
1253 elif ext == ".py":
1254 fname, arcname = self._get_codename(path[0:-3],
1255 basename)
1256 if self.debug:
1257 print "Adding", arcname
1258 self.write(fname, arcname)
1259 else:
1260 # This is NOT a package directory, add its files at top level
1261 if self.debug:
1262 print "Adding files from directory", pathname
1263 for filename in os.listdir(pathname):
1264 path = os.path.join(pathname, filename)
1265 root, ext = os.path.splitext(filename)
1266 if ext == ".py":
1267 fname, arcname = self._get_codename(path[0:-3],
1268 basename)
1269 if self.debug:
1270 print "Adding", arcname
1271 self.write(fname, arcname)
1272 else:
1273 if pathname[-3:] != ".py":
1274 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001275 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001276 fname, arcname = self._get_codename(pathname[0:-3], basename)
1277 if self.debug:
1278 print "Adding file", arcname
1279 self.write(fname, arcname)
1280
1281 def _get_codename(self, pathname, basename):
1282 """Return (filename, archivename) for the path.
1283
Fred Drake484d7352000-10-02 21:14:52 +00001284 Given a module name path, return the correct file path and
1285 archive name, compiling if necessary. For example, given
1286 /python/lib/string, return (/python/lib/string.pyc, string).
1287 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001288 file_py = pathname + ".py"
1289 file_pyc = pathname + ".pyc"
1290 file_pyo = pathname + ".pyo"
1291 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001292 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001293 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001294 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001295 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001296 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001297 if self.debug:
1298 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001299 try:
1300 py_compile.compile(file_py, file_pyc, None, True)
1301 except py_compile.PyCompileError,err:
1302 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001303 fname = file_pyc
1304 else:
1305 fname = file_pyc
1306 archivename = os.path.split(fname)[1]
1307 if basename:
1308 archivename = "%s/%s" % (basename, archivename)
1309 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001310
1311
1312def main(args = None):
1313 import textwrap
1314 USAGE=textwrap.dedent("""\
1315 Usage:
1316 zipfile.py -l zipfile.zip # Show listing of a zipfile
1317 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1318 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1319 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1320 """)
1321 if args is None:
1322 args = sys.argv[1:]
1323
1324 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1325 print USAGE
1326 sys.exit(1)
1327
1328 if args[0] == '-l':
1329 if len(args) != 2:
1330 print USAGE
1331 sys.exit(1)
1332 zf = ZipFile(args[1], 'r')
1333 zf.printdir()
1334 zf.close()
1335
1336 elif args[0] == '-t':
1337 if len(args) != 2:
1338 print USAGE
1339 sys.exit(1)
1340 zf = ZipFile(args[1], 'r')
1341 zf.testzip()
1342 print "Done testing"
1343
1344 elif args[0] == '-e':
1345 if len(args) != 3:
1346 print USAGE
1347 sys.exit(1)
1348
1349 zf = ZipFile(args[1], 'r')
1350 out = args[2]
1351 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001352 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001353 tgt = os.path.join(out, path[2:])
1354 else:
1355 tgt = os.path.join(out, path)
1356
1357 tgtdir = os.path.dirname(tgt)
1358 if not os.path.exists(tgtdir):
1359 os.makedirs(tgtdir)
1360 fp = open(tgt, 'wb')
1361 fp.write(zf.read(path))
1362 fp.close()
1363 zf.close()
1364
1365 elif args[0] == '-c':
1366 if len(args) < 3:
1367 print USAGE
1368 sys.exit(1)
1369
1370 def addToZip(zf, path, zippath):
1371 if os.path.isfile(path):
1372 zf.write(path, zippath, ZIP_DEFLATED)
1373 elif os.path.isdir(path):
1374 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001375 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001376 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001377 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001378
1379 zf = ZipFile(args[1], 'w', allowZip64=True)
1380 for src in args[2:]:
1381 addToZip(zf, src, os.path.basename(src))
1382
1383 zf.close()
1384
1385if __name__ == "__main__":
1386 main()