blob: 66fe07816e12450f4dcee04bb8850f45206576a8 [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Ronald Oussoren143cefb2006-06-15 08:14:18 +00005import binascii, cStringIO
Guido van Rossum32abe6f2000-03-31 17:30:02 +00006
7try:
Tim Peterse1190062001-01-15 03:34:38 +00008 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +00009 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000010except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000011 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000012 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013
Skip Montanaro40fc1602001-03-01 04:27:19 +000014__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000015 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000016
Fred Drake5db246d2000-09-29 20:44:48 +000017class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000019
20
21class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000022 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000023 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
24 and those extensions are disabled.
25 """
26
Tim Peterse1190062001-01-15 03:34:38 +000027error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Ronald Oussoren143cefb2006-06-15 08:14:18 +000029ZIP64_LIMIT= (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000030ZIP_FILECOUNT_LIMIT = 1 << 16
31ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000032
Guido van Rossum32abe6f2000-03-31 17:30:02 +000033# constants for Zip file compression methods
34ZIP_STORED = 0
35ZIP_DEFLATED = 8
36# Other ZIP compression methods not supported
37
Martin v. Löwis8c436412008-07-03 12:51:14 +000038# Below are some formats and associated data for reading/writing headers using
39# the struct module. The names and structures of headers/records are those used
40# in the PKWARE description of the ZIP file format:
41# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
42# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000043
Martin v. Löwis8c436412008-07-03 12:51:14 +000044# The "end of central directory" structure, magic number, size, and indices
45# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000046structEndArchive = "<4s4H2LH"
47stringEndArchive = "PK\005\006"
48sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000049
50_ECD_SIGNATURE = 0
51_ECD_DISK_NUMBER = 1
52_ECD_DISK_START = 2
53_ECD_ENTRIES_THIS_DISK = 3
54_ECD_ENTRIES_TOTAL = 4
55_ECD_SIZE = 5
56_ECD_OFFSET = 6
57_ECD_COMMENT_SIZE = 7
58# These last two indices are not part of the structure as defined in the
59# spec, but they are used internally by this module as a convenience
60_ECD_COMMENT = 8
61_ECD_LOCATION = 9
62
63# The "central directory" structure, magic number, size, and indices
64# of entries in the structure (section V.F in the format document)
65structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000066stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000067sizeCentralDir = struct.calcsize(structCentralDir)
68
Fred Drake3e038e52001-02-28 17:56:26 +000069# indexes of entries in the central directory structure
70_CD_SIGNATURE = 0
71_CD_CREATE_VERSION = 1
72_CD_CREATE_SYSTEM = 2
73_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000074_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000075_CD_FLAG_BITS = 5
76_CD_COMPRESS_TYPE = 6
77_CD_TIME = 7
78_CD_DATE = 8
79_CD_CRC = 9
80_CD_COMPRESSED_SIZE = 10
81_CD_UNCOMPRESSED_SIZE = 11
82_CD_FILENAME_LENGTH = 12
83_CD_EXTRA_FIELD_LENGTH = 13
84_CD_COMMENT_LENGTH = 14
85_CD_DISK_NUMBER_START = 15
86_CD_INTERNAL_FILE_ATTRIBUTES = 16
87_CD_EXTERNAL_FILE_ATTRIBUTES = 17
88_CD_LOCAL_HEADER_OFFSET = 18
89
Martin v. Löwis8c436412008-07-03 12:51:14 +000090# The "local file header" structure, magic number, size, and indices
91# (section V.A in the format document)
92structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000093stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000094sizeFileHeader = struct.calcsize(structFileHeader)
95
Fred Drake3e038e52001-02-28 17:56:26 +000096_FH_SIGNATURE = 0
97_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000098_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +000099_FH_GENERAL_PURPOSE_FLAG_BITS = 3
100_FH_COMPRESSION_METHOD = 4
101_FH_LAST_MOD_TIME = 5
102_FH_LAST_MOD_DATE = 6
103_FH_CRC = 7
104_FH_COMPRESSED_SIZE = 8
105_FH_UNCOMPRESSED_SIZE = 9
106_FH_FILENAME_LENGTH = 10
107_FH_EXTRA_FIELD_LENGTH = 11
108
Martin v. Löwis8c436412008-07-03 12:51:14 +0000109# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000110structEndArchive64Locator = "<4sLQL"
111stringEndArchive64Locator = "PK\x06\x07"
112sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000113
114# The "Zip64 end of central directory" record, magic number, size, and indices
115# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000116structEndArchive64 = "<4sQ2H2L4Q"
117stringEndArchive64 = "PK\x06\x06"
118sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000119
120_CD64_SIGNATURE = 0
121_CD64_DIRECTORY_RECSIZE = 1
122_CD64_CREATE_VERSION = 2
123_CD64_EXTRACT_VERSION = 3
124_CD64_DISK_NUMBER = 4
125_CD64_DISK_NUMBER_START = 5
126_CD64_NUMBER_ENTRIES_THIS_DISK = 6
127_CD64_NUMBER_ENTRIES_TOTAL = 7
128_CD64_DIRECTORY_SIZE = 8
129_CD64_OFFSET_START_CENTDIR = 9
130
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000131def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000132 try:
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000133 if _EndRecData(fp):
134 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000135 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000136 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000137 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000139def is_zipfile(filename):
140 """Quickly see if a file is a ZIP file by checking the magic number.
141
142 The filename argument may be a file or file-like object too.
143 """
144 result = False
145 try:
146 if hasattr(filename, "read"):
147 result = _check_zipfile(fp=filename)
148 else:
149 with open(filename, "rb") as fp:
150 result = _check_zipfile(fp)
151 except IOError:
152 pass
153 return result
154
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000155def _EndRecData64(fpin, offset, endrec):
156 """
157 Read the ZIP64 end-of-archive records and use that to update endrec
158 """
Martin v. Löwis8c436412008-07-03 12:51:14 +0000159 fpin.seek(offset - sizeEndCentDir64Locator, 2)
160 data = fpin.read(sizeEndCentDir64Locator)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000161 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
162 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000163 return endrec
164
165 if diskno != 0 or disks != 1:
166 raise BadZipfile("zipfiles that span multiple disks are not supported")
167
Tim Petersa608bb22006-06-15 18:06:29 +0000168 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000169 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
170 data = fpin.read(sizeEndCentDir64)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000171 sig, sz, create_version, read_version, disk_num, disk_dir, \
172 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000173 struct.unpack(structEndArchive64, data)
174 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000175 return endrec
176
177 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000178 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000179 endrec[_ECD_DISK_NUMBER] = disk_num
180 endrec[_ECD_DISK_START] = disk_dir
181 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
182 endrec[_ECD_ENTRIES_TOTAL] = dircount2
183 endrec[_ECD_SIZE] = dirsize
184 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000185 return endrec
186
187
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000188def _EndRecData(fpin):
189 """Return data from the "End of Central Directory" record, or None.
190
191 The data is a list of the nine items in the ZIP "End of central dir"
192 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000193
194 # Determine file size
195 fpin.seek(0, 2)
196 filesize = fpin.tell()
197
198 # Check to see if this is ZIP file with no archive comment (the
199 # "end of central directory" structure should be the last item in the
200 # file if this is the case).
201 fpin.seek(-sizeEndCentDir, 2)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000202 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000203 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
Martin v. Löwis8c436412008-07-03 12:51:14 +0000204 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000205 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000206 endrec=list(endrec)
207
208 # Append a blank comment and record start offset
209 endrec.append("")
210 endrec.append(filesize - sizeEndCentDir)
211 if endrec[_ECD_OFFSET] == 0xffffffff:
212 # the value for the "offset of the start of the central directory"
213 # indicates that there is a "Zip64 end of central directory"
214 # structure present, so go look for it
215 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
216
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000217 return endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000218
219 # Either this is not a ZIP file, or it is a ZIP file with an archive
220 # comment. Search the end of the file for the "end of central directory"
221 # record signature. The comment is the last item in the ZIP file and may be
222 # up to 64K long. It is assumed that the "end of central directory" magic
223 # number does not appear in the comment.
224 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
225 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000226 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000227 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000228 if start >= 0:
229 # found the magic number; attempt to unpack and interpret
230 recData = data[start:start+sizeEndCentDir]
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000231 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwis8c436412008-07-03 12:51:14 +0000232 comment = data[start+sizeEndCentDir:]
233 # check that comment length is correct
234 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000235 # Append the archive comment and start offset
236 endrec.append(comment)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000237 endrec.append(maxCommentStart + start)
238 if endrec[_ECD_OFFSET] == 0xffffffff:
239 # There is apparently a "Zip64 end of central directory"
240 # structure present, so go look for it
241 return _EndRecData64(fpin, start - filesize, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000242 return endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000243
244 # Unable to find a valid end of central directory structure
245 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000246
Fred Drake484d7352000-10-02 21:14:52 +0000247
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000248class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000249 """Class with attributes describing each file in the ZIP archive."""
250
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000251 __slots__ = (
252 'orig_filename',
253 'filename',
254 'date_time',
255 'compress_type',
256 'comment',
257 'extra',
258 'create_system',
259 'create_version',
260 'extract_version',
261 'reserved',
262 'flag_bits',
263 'volume',
264 'internal_attr',
265 'external_attr',
266 'header_offset',
267 'CRC',
268 'compress_size',
269 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000270 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000271 )
272
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000273 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000274 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000275
276 # Terminate the file name at the first null byte. Null bytes in file
277 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000278 null_byte = filename.find(chr(0))
279 if null_byte >= 0:
280 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000281 # This is used to ensure paths in generated ZIP files always use
282 # forward slashes as the directory separator, as required by the
283 # ZIP format specification.
284 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000285 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000286
Greg Ward8e36d282003-06-18 00:53:06 +0000287 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000288 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000289 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000290 self.compress_type = ZIP_STORED # Type of compression for the file
291 self.comment = "" # Comment for each file
292 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000293 if sys.platform == 'win32':
294 self.create_system = 0 # System which created ZIP archive
295 else:
296 # Assume everything else is unix-y
297 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000298 self.create_version = 20 # Version which created ZIP archive
299 self.extract_version = 20 # Version needed to extract archive
300 self.reserved = 0 # Must be zero
301 self.flag_bits = 0 # ZIP flag bits
302 self.volume = 0 # Volume number of file header
303 self.internal_attr = 0 # Internal attributes
304 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000305 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000306 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000307 # CRC CRC-32 of the uncompressed file
308 # compress_size Size of the compressed file
309 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000310
311 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000312 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000313 dt = self.date_time
314 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000315 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000317 # Set these to zero because we write them after the file data
318 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000319 else:
Tim Peterse1190062001-01-15 03:34:38 +0000320 CRC = self.CRC
321 compress_size = self.compress_size
322 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000323
324 extra = self.extra
325
326 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
327 # File is larger than what fits into a 4 byte integer,
328 # fall back to the ZIP64 extension
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000329 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000330 extra = extra + struct.pack(fmt,
331 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000332 file_size = 0xffffffff
333 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000334 self.extract_version = max(45, self.extract_version)
335 self.create_version = max(45, self.extract_version)
336
Martin v. Löwis471617d2008-05-05 17:16:58 +0000337 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000338 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000339 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000340 self.compress_type, dostime, dosdate, CRC,
341 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000342 len(filename), len(extra))
343 return header + filename + extra
344
345 def _encodeFilenameFlags(self):
346 if isinstance(self.filename, unicode):
347 try:
348 return self.filename.encode('ascii'), self.flag_bits
349 except UnicodeEncodeError:
350 return self.filename.encode('utf-8'), self.flag_bits | 0x800
351 else:
352 return self.filename, self.flag_bits
353
354 def _decodeFilename(self):
355 if self.flag_bits & 0x800:
356 return self.filename.decode('utf-8')
357 else:
358 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000359
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000360 def _decodeExtra(self):
361 # Try to decode the extra field.
362 extra = self.extra
363 unpack = struct.unpack
364 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000365 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000366 if tp == 1:
367 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000368 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000369 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000370 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000371 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000372 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000373 elif ln == 0:
374 counts = ()
375 else:
376 raise RuntimeError, "Corrupt extra field %s"%(ln,)
377
378 idx = 0
379
380 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000381 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000382 self.file_size = counts[idx]
383 idx += 1
384
Martin v. Löwis8c436412008-07-03 12:51:14 +0000385 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000386 self.compress_size = counts[idx]
387 idx += 1
388
Martin v. Löwis8c436412008-07-03 12:51:14 +0000389 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000390 old = self.header_offset
391 self.header_offset = counts[idx]
392 idx+=1
393
394 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000395
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000396
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000397class _ZipDecrypter:
398 """Class to handle decryption of files stored within a ZIP archive.
399
400 ZIP supports a password-based form of encryption. Even though known
401 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000402 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000403
404 Usage:
405 zd = _ZipDecrypter(mypwd)
406 plain_char = zd(cypher_char)
407 plain_text = map(zd, cypher_text)
408 """
409
410 def _GenerateCRCTable():
411 """Generate a CRC-32 table.
412
413 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
414 internal keys. We noticed that a direct implementation is faster than
415 relying on binascii.crc32().
416 """
417 poly = 0xedb88320
418 table = [0] * 256
419 for i in range(256):
420 crc = i
421 for j in range(8):
422 if crc & 1:
423 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
424 else:
425 crc = ((crc >> 1) & 0x7FFFFFFF)
426 table[i] = crc
427 return table
428 crctable = _GenerateCRCTable()
429
430 def _crc32(self, ch, crc):
431 """Compute the CRC32 primitive on one byte."""
432 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
433
434 def __init__(self, pwd):
435 self.key0 = 305419896
436 self.key1 = 591751049
437 self.key2 = 878082192
438 for p in pwd:
439 self._UpdateKeys(p)
440
441 def _UpdateKeys(self, c):
442 self.key0 = self._crc32(c, self.key0)
443 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
444 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
445 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
446
447 def __call__(self, c):
448 """Decrypt a single character."""
449 c = ord(c)
450 k = self.key2 | 2
451 c = c ^ (((k * (k^1)) >> 8) & 255)
452 c = chr(c)
453 self._UpdateKeys(c)
454 return c
455
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000456class ZipExtFile:
457 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000458 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000459 """
Tim Petersea5962f2007-03-12 18:07:52 +0000460
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000461 def __init__(self, fileobj, zipinfo, decrypt=None):
462 self.fileobj = fileobj
463 self.decrypter = decrypt
464 self.bytes_read = 0L
465 self.rawbuffer = ''
466 self.readbuffer = ''
467 self.linebuffer = ''
468 self.eof = False
469 self.univ_newlines = False
470 self.nlSeps = ("\n", )
471 self.lastdiscard = ''
472
473 self.compress_type = zipinfo.compress_type
474 self.compress_size = zipinfo.compress_size
Tim Petersea5962f2007-03-12 18:07:52 +0000475
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000476 self.closed = False
477 self.mode = "r"
478 self.name = zipinfo.filename
479
480 # read from compressed files in 64k blocks
481 self.compreadsize = 64*1024
482 if self.compress_type == ZIP_DEFLATED:
483 self.dc = zlib.decompressobj(-15)
484
485 def set_univ_newlines(self, univ_newlines):
486 self.univ_newlines = univ_newlines
Tim Petersea5962f2007-03-12 18:07:52 +0000487
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000488 # pick line separator char(s) based on universal newlines flag
489 self.nlSeps = ("\n", )
490 if self.univ_newlines:
491 self.nlSeps = ("\r\n", "\r", "\n")
492
493 def __iter__(self):
494 return self
Tim Petersea5962f2007-03-12 18:07:52 +0000495
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000496 def next(self):
497 nextline = self.readline()
498 if not nextline:
499 raise StopIteration()
500
501 return nextline
502
503 def close(self):
504 self.closed = True
505
506 def _checkfornewline(self):
507 nl, nllen = -1, -1
508 if self.linebuffer:
509 # ugly check for cases where half of an \r\n pair was
510 # read on the last pass, and the \r was discarded. In this
511 # case we just throw away the \n at the start of the buffer.
512 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
513 self.linebuffer = self.linebuffer[1:]
514
Tim Petersea5962f2007-03-12 18:07:52 +0000515 for sep in self.nlSeps:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000516 nl = self.linebuffer.find(sep)
517 if nl >= 0:
518 nllen = len(sep)
519 return nl, nllen
520
521 return nl, nllen
Tim Petersea5962f2007-03-12 18:07:52 +0000522
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000523 def readline(self, size = -1):
524 """Read a line with approx. size. If size is negative,
Tim Petersea5962f2007-03-12 18:07:52 +0000525 read a whole line.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000526 """
527 if size < 0:
528 size = sys.maxint
529 elif size == 0:
530 return ''
531
532 # check for a newline already in buffer
533 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000534
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000535 if nl >= 0:
536 # the next line was already in the buffer
537 nl = min(nl, size)
538 else:
539 # no line break in buffer - try to read more
540 size -= len(self.linebuffer)
541 while nl < 0 and size > 0:
542 buf = self.read(min(size, 100))
543 if not buf:
544 break
545 self.linebuffer += buf
546 size -= len(buf)
547
548 # check for a newline in buffer
549 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000550
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000551 # we either ran out of bytes in the file, or
552 # met the specified size limit without finding a newline,
553 # so return current buffer
554 if nl < 0:
555 s = self.linebuffer
556 self.linebuffer = ''
557 return s
558
559 buf = self.linebuffer[:nl]
560 self.lastdiscard = self.linebuffer[nl:nl + nllen]
561 self.linebuffer = self.linebuffer[nl + nllen:]
562
563 # line is always returned with \n as newline char (except possibly
564 # for a final incomplete line in the file, which is handled above).
565 return buf + "\n"
566
567 def readlines(self, sizehint = -1):
568 """Return a list with all (following) lines. The sizehint parameter
569 is ignored in this implementation.
570 """
571 result = []
572 while True:
573 line = self.readline()
574 if not line: break
575 result.append(line)
576 return result
577
578 def read(self, size = None):
579 # act like file() obj and return empty string if size is 0
580 if size == 0:
581 return ''
582
583 # determine read size
584 bytesToRead = self.compress_size - self.bytes_read
585
586 # adjust read size for encrypted files since the first 12 bytes
587 # are for the encryption/password information
588 if self.decrypter is not None:
589 bytesToRead -= 12
590
591 if size is not None and size >= 0:
592 if self.compress_type == ZIP_STORED:
593 lr = len(self.readbuffer)
594 bytesToRead = min(bytesToRead, size - lr)
595 elif self.compress_type == ZIP_DEFLATED:
596 if len(self.readbuffer) > size:
597 # the user has requested fewer bytes than we've already
598 # pulled through the decompressor; don't read any more
599 bytesToRead = 0
600 else:
601 # user will use up the buffer, so read some more
602 lr = len(self.rawbuffer)
603 bytesToRead = min(bytesToRead, self.compreadsize - lr)
604
605 # avoid reading past end of file contents
606 if bytesToRead + self.bytes_read > self.compress_size:
607 bytesToRead = self.compress_size - self.bytes_read
608
609 # try to read from file (if necessary)
610 if bytesToRead > 0:
611 bytes = self.fileobj.read(bytesToRead)
612 self.bytes_read += len(bytes)
613 self.rawbuffer += bytes
614
615 # handle contents of raw buffer
616 if self.rawbuffer:
617 newdata = self.rawbuffer
618 self.rawbuffer = ''
619
620 # decrypt new data if we were given an object to handle that
621 if newdata and self.decrypter is not None:
622 newdata = ''.join(map(self.decrypter, newdata))
623
624 # decompress newly read data if necessary
625 if newdata and self.compress_type == ZIP_DEFLATED:
626 newdata = self.dc.decompress(newdata)
627 self.rawbuffer = self.dc.unconsumed_tail
628 if self.eof and len(self.rawbuffer) == 0:
Tim Petersea5962f2007-03-12 18:07:52 +0000629 # we're out of raw bytes (both from the file and
630 # the local buffer); flush just to make sure the
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000631 # decompressor is done
632 newdata += self.dc.flush()
633 # prevent decompressor from being used again
634 self.dc = None
635
636 self.readbuffer += newdata
637
638
639 # return what the user asked for
640 if size is None or len(self.readbuffer) <= size:
641 bytes = self.readbuffer
642 self.readbuffer = ''
643 else:
644 bytes = self.readbuffer[:size]
645 self.readbuffer = self.readbuffer[size:]
646
647 return bytes
Tim Petersea5962f2007-03-12 18:07:52 +0000648
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000649
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000650class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000651 """ Class with methods to open, read, write, close, list zip files.
652
Martin v. Löwis8c436412008-07-03 12:51:14 +0000653 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000654
Fred Drake3d9091e2001-03-26 15:49:24 +0000655 file: Either the path to the file, or a file-like object.
656 If it is a path, the file will be opened and closed by ZipFile.
657 mode: The mode can be either read "r", write "w" or append "a".
658 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000659 allowZip64: if True ZipFile will create files with ZIP64 extensions when
660 needed, otherwise it will raise an exception when this would
661 be necessary.
662
Fred Drake3d9091e2001-03-26 15:49:24 +0000663 """
Fred Drake484d7352000-10-02 21:14:52 +0000664
Fred Drake90eac282001-02-28 05:29:34 +0000665 fp = None # Set here since __del__ checks it
666
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000667 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000668 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000669 if mode not in ("r", "w", "a"):
670 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
671
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000672 if compression == ZIP_STORED:
673 pass
674 elif compression == ZIP_DEFLATED:
675 if not zlib:
676 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000677 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000678 else:
679 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000680
681 self._allowZip64 = allowZip64
682 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000683 self.debug = 0 # Level of printing: 0 through 3
684 self.NameToInfo = {} # Find file info given name
685 self.filelist = [] # List of ZipInfo instances for archive
686 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000687 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000688 self.pwd = None
Martin v. Löwis8c436412008-07-03 12:51:14 +0000689 self.comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000690
Fred Drake3d9091e2001-03-26 15:49:24 +0000691 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000692 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000693 self._filePassed = 0
694 self.filename = file
695 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000696 try:
697 self.fp = open(file, modeDict[mode])
698 except IOError:
699 if mode == 'a':
700 mode = key = 'w'
701 self.fp = open(file, modeDict[mode])
702 else:
703 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000704 else:
705 self._filePassed = 1
706 self.fp = file
707 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000708
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000709 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000710 self._GetContents()
711 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000712 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000713 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000714 try: # See if file is a zip file
715 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000716 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000717 self.fp.seek(self.start_dir, 0)
718 except BadZipfile: # file is not a zip file, just append
719 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000720 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000721 if not self._filePassed:
722 self.fp.close()
723 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000724 raise RuntimeError, 'Mode must be "r", "w" or "a"'
725
726 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000727 """Read the directory, making sure we close the file if the format
728 is bad."""
729 try:
730 self._RealGetContents()
731 except BadZipfile:
732 if not self._filePassed:
733 self.fp.close()
734 self.fp = None
735 raise
736
737 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000738 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000739 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000740 endrec = _EndRecData(fp)
741 if not endrec:
742 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000743 if self.debug > 1:
744 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000745 size_cd = endrec[_ECD_SIZE] # bytes in central directory
746 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
747 self.comment = endrec[_ECD_COMMENT] # archive comment
748
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000749 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000750 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000751 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
752 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000753 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
754
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000755 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000756 inferred = concat + offset_cd
757 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000758 # self.start_dir: Position of start of central directory
759 self.start_dir = offset_cd + concat
760 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000761 data = fp.read(size_cd)
762 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000763 total = 0
764 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000765 centdir = fp.read(sizeCentralDir)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000766 if centdir[0:4] != stringCentralDir:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000767 raise BadZipfile, "Bad magic number for central directory"
768 centdir = struct.unpack(structCentralDir, centdir)
769 if self.debug > 2:
770 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000771 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000772 # Create ZipInfo instance to store file information
773 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000774 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
775 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000776 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000777 (x.create_version, x.create_system, x.extract_version, x.reserved,
778 x.flag_bits, x.compress_type, t, d,
779 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
780 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
781 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000782 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000783 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000784 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000785
786 x._decodeExtra()
787 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000788 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000789 self.filelist.append(x)
790 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000791
792 # update total bytes read from central directory
793 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
794 + centdir[_CD_EXTRA_FIELD_LENGTH]
795 + centdir[_CD_COMMENT_LENGTH])
796
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000797 if self.debug > 2:
798 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000799
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000800
801 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000802 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000803 l = []
804 for data in self.filelist:
805 l.append(data.filename)
806 return l
807
808 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000809 """Return a list of class ZipInfo instances for files in the
810 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000811 return self.filelist
812
813 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000814 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000815 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
816 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000817 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000818 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
819
820 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000821 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000822 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000823 for zinfo in self.filelist:
824 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000825 # Read by chunks, to avoid an OverflowError or a
826 # MemoryError with very large embedded files.
827 f = self.open(zinfo.filename, "r")
828 while f.read(chunk_size): # Check CRC-32
829 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000830 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000831 return zinfo.filename
832
833 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000834 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000835 info = self.NameToInfo.get(name)
836 if info is None:
837 raise KeyError(
838 'There is no item named %r in the archive' % name)
839
840 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000841
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000842 def setpassword(self, pwd):
843 """Set default password for encrypted files."""
844 self.pwd = pwd
845
846 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000847 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000848 return self.open(name, "r", pwd).read()
849
850 def open(self, name, mode="r", pwd=None):
851 """Return file-like object for 'name'."""
852 if mode not in ("r", "U", "rU"):
853 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000854 if not self.fp:
855 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000856 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000857
Tim Petersea5962f2007-03-12 18:07:52 +0000858 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000859 # given a file object in the constructor
860 if self._filePassed:
861 zef_file = self.fp
862 else:
863 zef_file = open(self.filename, 'rb')
864
Georg Brandl112aa502008-05-20 08:25:48 +0000865 # Make sure we have an info object
866 if isinstance(name, ZipInfo):
867 # 'name' is already an info object
868 zinfo = name
869 else:
870 # Get info object for name
871 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000872
873 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000874
875 # Skip the file header:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000876 fheader = zef_file.read(sizeFileHeader)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000877 if fheader[0:4] != stringFileHeader:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000878 raise BadZipfile, "Bad magic number for file header"
879
880 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000881 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000882 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000883 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000884
885 if fname != zinfo.orig_filename:
886 raise BadZipfile, \
887 'File name in directory "%s" and header "%s" differ.' % (
888 zinfo.orig_filename, fname)
889
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000890 # check for encrypted flag & handle password
891 is_encrypted = zinfo.flag_bits & 0x1
892 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000893 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000894 if not pwd:
895 pwd = self.pwd
896 if not pwd:
897 raise RuntimeError, "File %s is encrypted, " \
898 "password required for extraction" % name
899
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000900 zd = _ZipDecrypter(pwd)
901 # The first 12 bytes in the cypher stream is an encryption header
902 # used to strengthen the algorithm. The first 11 bytes are
903 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000904 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000905 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000906 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000907 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000908 if zinfo.flag_bits & 0x8:
909 # compare against the file type from extended local headers
910 check_byte = (zinfo._raw_time >> 8) & 0xff
911 else:
912 # compare against the CRC otherwise
913 check_byte = (zinfo.CRC >> 24) & 0xff
914 if ord(h[11]) != check_byte:
915 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000916
917 # build and return a ZipExtFile
918 if zd is None:
919 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000920 else:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000921 zef = ZipExtFile(zef_file, zinfo, zd)
922
923 # set universal newlines on ZipExtFile if necessary
924 if "U" in mode:
925 zef.set_univ_newlines(True)
926 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000927
Georg Brandl62416bc2008-01-07 18:47:44 +0000928 def extract(self, member, path=None, pwd=None):
929 """Extract a member from the archive to the current working directory,
930 using its full name. Its file information is extracted as accurately
931 as possible. `member' may be a filename or a ZipInfo object. You can
932 specify a different directory using `path'.
933 """
934 if not isinstance(member, ZipInfo):
935 member = self.getinfo(member)
936
937 if path is None:
938 path = os.getcwd()
939
940 return self._extract_member(member, path, pwd)
941
942 def extractall(self, path=None, members=None, pwd=None):
943 """Extract all members from the archive to the current working
944 directory. `path' specifies a different directory to extract to.
945 `members' is optional and must be a subset of the list returned
946 by namelist().
947 """
948 if members is None:
949 members = self.namelist()
950
951 for zipinfo in members:
952 self.extract(zipinfo, path, pwd)
953
954 def _extract_member(self, member, targetpath, pwd):
955 """Extract the ZipInfo object 'member' to a physical
956 file on the path targetpath.
957 """
958 # build the destination pathname, replacing
959 # forward slashes to platform specific separators.
960 if targetpath[-1:] == "/":
961 targetpath = targetpath[:-1]
962
963 # don't include leading "/" from file name if present
964 if os.path.isabs(member.filename):
965 targetpath = os.path.join(targetpath, member.filename[1:])
966 else:
967 targetpath = os.path.join(targetpath, member.filename)
968
969 targetpath = os.path.normpath(targetpath)
970
971 # Create all upper directories if necessary.
972 upperdirs = os.path.dirname(targetpath)
973 if upperdirs and not os.path.exists(upperdirs):
974 os.makedirs(upperdirs)
975
Georg Brandl112aa502008-05-20 08:25:48 +0000976 source = self.open(member, pwd=pwd)
Georg Brandl62416bc2008-01-07 18:47:44 +0000977 target = file(targetpath, "wb")
978 shutil.copyfileobj(source, target)
979 source.close()
980 target.close()
981
982 return targetpath
983
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000984 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000985 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000986 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000987 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000988 print "Duplicate name:", zinfo.filename
989 if self.mode not in ("w", "a"):
990 raise RuntimeError, 'write() requires mode "w" or "a"'
991 if not self.fp:
992 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000993 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000994 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
995 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000996 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000997 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
998 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000999 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001000 if zinfo.file_size > ZIP64_LIMIT:
1001 if not self._allowZip64:
1002 raise LargeZipFile("Filesize would require ZIP64 extensions")
1003 if zinfo.header_offset > ZIP64_LIMIT:
1004 if not self._allowZip64:
1005 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001006
1007 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001008 """Put the bytes from filename into the archive under the name
1009 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001010 if not self.fp:
1011 raise RuntimeError(
1012 "Attempt to write to ZIP archive that was already closed")
1013
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001014 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001015 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001016 date_time = mtime[0:6]
1017 # Create ZipInfo instance to store file information
1018 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001019 arcname = filename
1020 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1021 while arcname[0] in (os.sep, os.altsep):
1022 arcname = arcname[1:]
1023 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001024 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001025 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001026 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001027 else:
Tim Peterse1190062001-01-15 03:34:38 +00001028 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001029
1030 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001031 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001032 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001033
1034 self._writecheck(zinfo)
1035 self._didModify = True
1036 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +00001037 # Must overwrite CRC and sizes with correct data later
1038 zinfo.CRC = CRC = 0
1039 zinfo.compress_size = compress_size = 0
1040 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001041 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001042 if zinfo.compress_type == ZIP_DEFLATED:
1043 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1044 zlib.DEFLATED, -15)
1045 else:
1046 cmpr = None
1047 while 1:
1048 buf = fp.read(1024 * 8)
1049 if not buf:
1050 break
1051 file_size = file_size + len(buf)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001052 CRC = crc32(buf, CRC) & 0xffffffff
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001053 if cmpr:
1054 buf = cmpr.compress(buf)
1055 compress_size = compress_size + len(buf)
1056 self.fp.write(buf)
1057 fp.close()
1058 if cmpr:
1059 buf = cmpr.flush()
1060 compress_size = compress_size + len(buf)
1061 self.fp.write(buf)
1062 zinfo.compress_size = compress_size
1063 else:
1064 zinfo.compress_size = file_size
1065 zinfo.CRC = CRC
1066 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001067 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001068 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001069 self.fp.seek(zinfo.header_offset + 14, 0)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001070 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001071 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001072 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001073 self.filelist.append(zinfo)
1074 self.NameToInfo[zinfo.filename] = zinfo
1075
Just van Rossumb083cb32002-12-12 12:23:32 +00001076 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +00001077 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001078 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1079 the name of the file in the archive."""
1080 if not isinstance(zinfo_or_arcname, ZipInfo):
1081 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001082 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001083 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001084 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001085 else:
1086 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001087
1088 if not self.fp:
1089 raise RuntimeError(
1090 "Attempt to write to ZIP archive that was already closed")
1091
Tim Peterse1190062001-01-15 03:34:38 +00001092 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001093 zinfo.header_offset = self.fp.tell() # Start of header bytes
1094 self._writecheck(zinfo)
1095 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001096 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001097 if zinfo.compress_type == ZIP_DEFLATED:
1098 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1099 zlib.DEFLATED, -15)
1100 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001101 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001102 else:
1103 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +00001104 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001105 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001106 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001107 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001108 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001109 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +00001110 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001111 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001112 self.filelist.append(zinfo)
1113 self.NameToInfo[zinfo.filename] = zinfo
1114
1115 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001116 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001117 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001118
1119 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001120 """Close the file, and for mode "w" and "a" write the ending
1121 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001122 if self.fp is None:
1123 return
Tim Petersa608bb22006-06-15 18:06:29 +00001124
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001125 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001126 count = 0
1127 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001128 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001129 count = count + 1
1130 dt = zinfo.date_time
1131 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001132 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001133 extra = []
1134 if zinfo.file_size > ZIP64_LIMIT \
1135 or zinfo.compress_size > ZIP64_LIMIT:
1136 extra.append(zinfo.file_size)
1137 extra.append(zinfo.compress_size)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001138 file_size = 0xffffffff
1139 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001140 else:
1141 file_size = zinfo.file_size
1142 compress_size = zinfo.compress_size
1143
1144 if zinfo.header_offset > ZIP64_LIMIT:
1145 extra.append(zinfo.header_offset)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001146 header_offset = 0xffffffffL
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001147 else:
1148 header_offset = zinfo.header_offset
1149
1150 extra_data = zinfo.extra
1151 if extra:
1152 # Append a ZIP64 field to the extra's
1153 extra_data = struct.pack(
Gregory P. Smithb89a0962008-03-19 01:46:10 +00001154 '<HH' + 'Q'*len(extra),
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001155 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001156
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001157 extract_version = max(45, zinfo.extract_version)
1158 create_version = max(45, zinfo.create_version)
1159 else:
1160 extract_version = zinfo.extract_version
1161 create_version = zinfo.create_version
1162
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001163 try:
Martin v. Löwis471617d2008-05-05 17:16:58 +00001164 filename, flag_bits = zinfo._encodeFilenameFlags()
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001165 centdir = struct.pack(structCentralDir,
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001166 stringCentralDir, create_version,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001167 zinfo.create_system, extract_version, zinfo.reserved,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001168 flag_bits, zinfo.compress_type, dostime, dosdate,
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001169 zinfo.CRC, compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +00001170 len(filename), len(extra_data), len(zinfo.comment),
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001171 0, zinfo.internal_attr, zinfo.external_attr,
1172 header_offset)
1173 except DeprecationWarning:
1174 print >>sys.stderr, (structCentralDir,
1175 stringCentralDir, create_version,
1176 zinfo.create_system, extract_version, zinfo.reserved,
1177 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1178 zinfo.CRC, compress_size, file_size,
1179 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1180 0, zinfo.internal_attr, zinfo.external_attr,
1181 header_offset)
1182 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001183 self.fp.write(centdir)
Martin v. Löwis471617d2008-05-05 17:16:58 +00001184 self.fp.write(filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001185 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001186 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001187
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001188 pos2 = self.fp.tell()
1189 # Write end-of-zip-archive record
Martin v. Löwis8c436412008-07-03 12:51:14 +00001190 centDirOffset = pos1
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001191 if pos1 > ZIP64_LIMIT:
1192 # Need to write the ZIP64 end-of-archive records
1193 zip64endrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001194 structEndArchive64, stringEndArchive64,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001195 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1196 self.fp.write(zip64endrec)
1197
1198 zip64locrec = struct.pack(
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001199 structEndArchive64Locator,
1200 stringEndArchive64Locator, 0, pos2, 1)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001201 self.fp.write(zip64locrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +00001202 centDirOffset = 0xFFFFFFFF
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001203
Martin v. Löwis8c436412008-07-03 12:51:14 +00001204 # check for valid comment length
1205 if len(self.comment) >= ZIP_MAX_COMMENT:
1206 if self.debug > 0:
1207 msg = 'Archive comment is too long; truncating to %d bytes' \
1208 % ZIP_MAX_COMMENT
1209 self.comment = self.comment[:ZIP_MAX_COMMENT]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001210
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +00001211 endrec = struct.pack(structEndArchive, stringEndArchive,
Martin v. Löwis8c436412008-07-03 12:51:14 +00001212 0, 0, count % ZIP_FILECOUNT_LIMIT,
1213 count % ZIP_FILECOUNT_LIMIT, pos2 - pos1,
1214 centDirOffset, len(self.comment))
1215 self.fp.write(endrec)
1216 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001217 self.fp.flush()
Martin v. Löwis8c436412008-07-03 12:51:14 +00001218
Fred Drake3d9091e2001-03-26 15:49:24 +00001219 if not self._filePassed:
1220 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001221 self.fp = None
1222
1223
1224class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001225 """Class to create ZIP archives with Python library files and packages."""
1226
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001227 def writepy(self, pathname, basename = ""):
1228 """Add all files from "pathname" to the ZIP archive.
1229
Fred Drake484d7352000-10-02 21:14:52 +00001230 If pathname is a package directory, search the directory and
1231 all package subdirectories recursively for all *.py and enter
1232 the modules into the archive. If pathname is a plain
1233 directory, listdir *.py and enter all modules. Else, pathname
1234 must be a Python *.py file and the module will be put into the
1235 archive. Added modules are always module.pyo or module.pyc.
1236 This method will compile the module.py into module.pyc if
1237 necessary.
1238 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001239 dir, name = os.path.split(pathname)
1240 if os.path.isdir(pathname):
1241 initname = os.path.join(pathname, "__init__.py")
1242 if os.path.isfile(initname):
1243 # This is a package directory, add it
1244 if basename:
1245 basename = "%s/%s" % (basename, name)
1246 else:
1247 basename = name
1248 if self.debug:
1249 print "Adding package in", pathname, "as", basename
1250 fname, arcname = self._get_codename(initname[0:-3], basename)
1251 if self.debug:
1252 print "Adding", arcname
1253 self.write(fname, arcname)
1254 dirlist = os.listdir(pathname)
1255 dirlist.remove("__init__.py")
1256 # Add all *.py files and package subdirectories
1257 for filename in dirlist:
1258 path = os.path.join(pathname, filename)
1259 root, ext = os.path.splitext(filename)
1260 if os.path.isdir(path):
1261 if os.path.isfile(os.path.join(path, "__init__.py")):
1262 # This is a package directory, add it
1263 self.writepy(path, basename) # Recursive call
1264 elif ext == ".py":
1265 fname, arcname = self._get_codename(path[0:-3],
1266 basename)
1267 if self.debug:
1268 print "Adding", arcname
1269 self.write(fname, arcname)
1270 else:
1271 # This is NOT a package directory, add its files at top level
1272 if self.debug:
1273 print "Adding files from directory", pathname
1274 for filename in os.listdir(pathname):
1275 path = os.path.join(pathname, filename)
1276 root, ext = os.path.splitext(filename)
1277 if ext == ".py":
1278 fname, arcname = self._get_codename(path[0:-3],
1279 basename)
1280 if self.debug:
1281 print "Adding", arcname
1282 self.write(fname, arcname)
1283 else:
1284 if pathname[-3:] != ".py":
1285 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001286 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001287 fname, arcname = self._get_codename(pathname[0:-3], basename)
1288 if self.debug:
1289 print "Adding file", arcname
1290 self.write(fname, arcname)
1291
1292 def _get_codename(self, pathname, basename):
1293 """Return (filename, archivename) for the path.
1294
Fred Drake484d7352000-10-02 21:14:52 +00001295 Given a module name path, return the correct file path and
1296 archive name, compiling if necessary. For example, given
1297 /python/lib/string, return (/python/lib/string.pyc, string).
1298 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001299 file_py = pathname + ".py"
1300 file_pyc = pathname + ".pyc"
1301 file_pyo = pathname + ".pyo"
1302 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001303 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001304 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001305 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001306 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001307 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001308 if self.debug:
1309 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001310 try:
1311 py_compile.compile(file_py, file_pyc, None, True)
1312 except py_compile.PyCompileError,err:
1313 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001314 fname = file_pyc
1315 else:
1316 fname = file_pyc
1317 archivename = os.path.split(fname)[1]
1318 if basename:
1319 archivename = "%s/%s" % (basename, archivename)
1320 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001321
1322
1323def main(args = None):
1324 import textwrap
1325 USAGE=textwrap.dedent("""\
1326 Usage:
1327 zipfile.py -l zipfile.zip # Show listing of a zipfile
1328 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1329 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1330 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1331 """)
1332 if args is None:
1333 args = sys.argv[1:]
1334
1335 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1336 print USAGE
1337 sys.exit(1)
1338
1339 if args[0] == '-l':
1340 if len(args) != 2:
1341 print USAGE
1342 sys.exit(1)
1343 zf = ZipFile(args[1], 'r')
1344 zf.printdir()
1345 zf.close()
1346
1347 elif args[0] == '-t':
1348 if len(args) != 2:
1349 print USAGE
1350 sys.exit(1)
1351 zf = ZipFile(args[1], 'r')
1352 zf.testzip()
1353 print "Done testing"
1354
1355 elif args[0] == '-e':
1356 if len(args) != 3:
1357 print USAGE
1358 sys.exit(1)
1359
1360 zf = ZipFile(args[1], 'r')
1361 out = args[2]
1362 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001363 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001364 tgt = os.path.join(out, path[2:])
1365 else:
1366 tgt = os.path.join(out, path)
1367
1368 tgtdir = os.path.dirname(tgt)
1369 if not os.path.exists(tgtdir):
1370 os.makedirs(tgtdir)
1371 fp = open(tgt, 'wb')
1372 fp.write(zf.read(path))
1373 fp.close()
1374 zf.close()
1375
1376 elif args[0] == '-c':
1377 if len(args) < 3:
1378 print USAGE
1379 sys.exit(1)
1380
1381 def addToZip(zf, path, zippath):
1382 if os.path.isfile(path):
1383 zf.write(path, zippath, ZIP_DEFLATED)
1384 elif os.path.isdir(path):
1385 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001386 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001387 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001388 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001389
1390 zf = ZipFile(args[1], 'w', allowZip64=True)
1391 for src in args[2:]:
1392 addToZip(zf, src, os.path.basename(src))
1393
1394 zf.close()
1395
1396if __name__ == "__main__":
1397 main()