blob: 93a0b75edb558cca8d55c4e4229842786d6f4c34 [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Martin v. Löwis00756902006-02-05 17:09:41 +00004import struct, os, time, sys
Ronald Oussoren143cefb2006-06-15 08:14:18 +00005import binascii, cStringIO
Guido van Rossum32abe6f2000-03-31 17:30:02 +00006
7try:
Tim Peterse1190062001-01-15 03:34:38 +00008 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +00009except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000010 zlib = None
11
Skip Montanaro40fc1602001-03-01 04:27:19 +000012__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000013 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000014
Fred Drake5db246d2000-09-29 20:44:48 +000015class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000016 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000017
18
19class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000020 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000021 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
22 and those extensions are disabled.
23 """
24
Tim Peterse1190062001-01-15 03:34:38 +000025error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000026
Ronald Oussoren143cefb2006-06-15 08:14:18 +000027ZIP64_LIMIT= (1 << 31) - 1
28
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029# constants for Zip file compression methods
30ZIP_STORED = 0
31ZIP_DEFLATED = 8
32# Other ZIP compression methods not supported
33
34# Here are some struct module formats for reading headers
35structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
36stringEndArchive = "PK\005\006" # magic number for end of archive record
Brett Cannonff450f72004-07-10 19:09:20 +000037structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000038stringCentralDir = "PK\001\002" # magic number for central directory
Brett Cannonff450f72004-07-10 19:09:20 +000039structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040stringFileHeader = "PK\003\004" # magic number for file header
Ronald Oussoren143cefb2006-06-15 08:14:18 +000041structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
42stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
43structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
44stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
45
Guido van Rossum32abe6f2000-03-31 17:30:02 +000046
Fred Drake3e038e52001-02-28 17:56:26 +000047# indexes of entries in the central directory structure
48_CD_SIGNATURE = 0
49_CD_CREATE_VERSION = 1
50_CD_CREATE_SYSTEM = 2
51_CD_EXTRACT_VERSION = 3
52_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
53_CD_FLAG_BITS = 5
54_CD_COMPRESS_TYPE = 6
55_CD_TIME = 7
56_CD_DATE = 8
57_CD_CRC = 9
58_CD_COMPRESSED_SIZE = 10
59_CD_UNCOMPRESSED_SIZE = 11
60_CD_FILENAME_LENGTH = 12
61_CD_EXTRA_FIELD_LENGTH = 13
62_CD_COMMENT_LENGTH = 14
63_CD_DISK_NUMBER_START = 15
64_CD_INTERNAL_FILE_ATTRIBUTES = 16
65_CD_EXTERNAL_FILE_ATTRIBUTES = 17
66_CD_LOCAL_HEADER_OFFSET = 18
67
68# indexes of entries in the local file header structure
69_FH_SIGNATURE = 0
70_FH_EXTRACT_VERSION = 1
71_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
72_FH_GENERAL_PURPOSE_FLAG_BITS = 3
73_FH_COMPRESSION_METHOD = 4
74_FH_LAST_MOD_TIME = 5
75_FH_LAST_MOD_DATE = 6
76_FH_CRC = 7
77_FH_COMPRESSED_SIZE = 8
78_FH_UNCOMPRESSED_SIZE = 9
79_FH_FILENAME_LENGTH = 10
80_FH_EXTRA_FIELD_LENGTH = 11
81
Guido van Rossum32abe6f2000-03-31 17:30:02 +000082def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000083 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +000084 try:
85 fpin = open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000086 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +000087 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000088 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +000089 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000090 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000091 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000092 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000093
Ronald Oussoren143cefb2006-06-15 08:14:18 +000094def _EndRecData64(fpin, offset, endrec):
95 """
96 Read the ZIP64 end-of-archive records and use that to update endrec
97 """
98 locatorSize = struct.calcsize(structEndArchive64Locator)
99 fpin.seek(offset - locatorSize, 2)
100 data = fpin.read(locatorSize)
101 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
Tim Petersa608bb22006-06-15 18:06:29 +0000102 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000103 return endrec
104
105 if diskno != 0 or disks != 1:
106 raise BadZipfile("zipfiles that span multiple disks are not supported")
107
Tim Petersa608bb22006-06-15 18:06:29 +0000108 # Assume no 'zip64 extensible data'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000109 endArchiveSize = struct.calcsize(structEndArchive64)
110 fpin.seek(offset - locatorSize - endArchiveSize, 2)
111 data = fpin.read(endArchiveSize)
112 sig, sz, create_version, read_version, disk_num, disk_dir, \
113 dircount, dircount2, dirsize, diroffset = \
114 struct.unpack(structEndArchive64, data)
Tim Petersa608bb22006-06-15 18:06:29 +0000115 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000116 return endrec
117
118 # Update the original endrec using data from the ZIP64 record
119 endrec[1] = disk_num
120 endrec[2] = disk_dir
121 endrec[3] = dircount
122 endrec[4] = dircount2
123 endrec[5] = dirsize
124 endrec[6] = diroffset
125 return endrec
126
127
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000128def _EndRecData(fpin):
129 """Return data from the "End of Central Directory" record, or None.
130
131 The data is a list of the nine items in the ZIP "End of central dir"
132 record followed by a tenth item, the file seek offset of this record."""
133 fpin.seek(-22, 2) # Assume no archive comment.
134 filesize = fpin.tell() + 22 # Get file size
135 data = fpin.read()
136 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
137 endrec = struct.unpack(structEndArchive, data)
138 endrec = list(endrec)
139 endrec.append("") # Append the archive comment
140 endrec.append(filesize - 22) # Append the record start offset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000141 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
142 return _EndRecData64(fpin, -22, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000143 return endrec
144 # Search the last END_BLOCK bytes of the file for the record signature.
145 # The comment is appended to the ZIP file and has a 16 bit length.
146 # So the comment may be up to 64K long. We limit the search for the
147 # signature to a few Kbytes at the end of the file for efficiency.
148 # also, the signature must not appear in the comment.
149 END_BLOCK = min(filesize, 1024 * 4)
150 fpin.seek(filesize - END_BLOCK, 0)
151 data = fpin.read()
152 start = data.rfind(stringEndArchive)
153 if start >= 0: # Correct signature string was found
154 endrec = struct.unpack(structEndArchive, data[start:start+22])
155 endrec = list(endrec)
156 comment = data[start+22:]
157 if endrec[7] == len(comment): # Comment length checks out
158 # Append the archive comment and start offset
159 endrec.append(comment)
160 endrec.append(filesize - END_BLOCK + start)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000161 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
162 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000163 return endrec
164 return # Error, return None
165
Fred Drake484d7352000-10-02 21:14:52 +0000166
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000167class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000168 """Class with attributes describing each file in the ZIP archive."""
169
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000170 __slots__ = (
171 'orig_filename',
172 'filename',
173 'date_time',
174 'compress_type',
175 'comment',
176 'extra',
177 'create_system',
178 'create_version',
179 'extract_version',
180 'reserved',
181 'flag_bits',
182 'volume',
183 'internal_attr',
184 'external_attr',
185 'header_offset',
186 'CRC',
187 'compress_size',
188 'file_size',
189 )
190
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000191 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000192 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000193
194 # Terminate the file name at the first null byte. Null bytes in file
195 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000196 null_byte = filename.find(chr(0))
197 if null_byte >= 0:
198 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000199 # This is used to ensure paths in generated ZIP files always use
200 # forward slashes as the directory separator, as required by the
201 # ZIP format specification.
202 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000203 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000204
Greg Ward8e36d282003-06-18 00:53:06 +0000205 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000206 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000207 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000208 self.compress_type = ZIP_STORED # Type of compression for the file
209 self.comment = "" # Comment for each file
210 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000211 if sys.platform == 'win32':
212 self.create_system = 0 # System which created ZIP archive
213 else:
214 # Assume everything else is unix-y
215 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000216 self.create_version = 20 # Version which created ZIP archive
217 self.extract_version = 20 # Version needed to extract archive
218 self.reserved = 0 # Must be zero
219 self.flag_bits = 0 # ZIP flag bits
220 self.volume = 0 # Volume number of file header
221 self.internal_attr = 0 # Internal attributes
222 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000223 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000224 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000225 # CRC CRC-32 of the uncompressed file
226 # compress_size Size of the compressed file
227 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000228
229 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000230 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000231 dt = self.date_time
232 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000233 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000234 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000235 # Set these to zero because we write them after the file data
236 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000237 else:
Tim Peterse1190062001-01-15 03:34:38 +0000238 CRC = self.CRC
239 compress_size = self.compress_size
240 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000241
242 extra = self.extra
243
244 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
245 # File is larger than what fits into a 4 byte integer,
246 # fall back to the ZIP64 extension
247 fmt = '<hhqq'
248 extra = extra + struct.pack(fmt,
249 1, struct.calcsize(fmt)-4, file_size, compress_size)
250 file_size = 0xffffffff # -1
251 compress_size = 0xffffffff # -1
252 self.extract_version = max(45, self.extract_version)
253 self.create_version = max(45, self.extract_version)
254
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000255 header = struct.pack(structFileHeader, stringFileHeader,
256 self.extract_version, self.reserved, self.flag_bits,
257 self.compress_type, dostime, dosdate, CRC,
258 compress_size, file_size,
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000259 len(self.filename), len(extra))
260 return header + self.filename + extra
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000261
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000262 def _decodeExtra(self):
263 # Try to decode the extra field.
264 extra = self.extra
265 unpack = struct.unpack
266 while extra:
267 tp, ln = unpack('<hh', extra[:4])
268 if tp == 1:
269 if ln >= 24:
270 counts = unpack('<qqq', extra[4:28])
271 elif ln == 16:
272 counts = unpack('<qq', extra[4:20])
273 elif ln == 8:
274 counts = unpack('<q', extra[4:12])
275 elif ln == 0:
276 counts = ()
277 else:
278 raise RuntimeError, "Corrupt extra field %s"%(ln,)
279
280 idx = 0
281
282 # ZIP64 extension (large files and/or large archives)
283 if self.file_size == -1 or self.file_size == 0xFFFFFFFFL:
284 self.file_size = counts[idx]
285 idx += 1
286
287 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
288 self.compress_size = counts[idx]
289 idx += 1
290
291 if self.header_offset == -1 or self.header_offset == 0xffffffffL:
292 old = self.header_offset
293 self.header_offset = counts[idx]
294 idx+=1
295
296 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000297
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000298
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000299class _ZipDecrypter:
300 """Class to handle decryption of files stored within a ZIP archive.
301
302 ZIP supports a password-based form of encryption. Even though known
303 plaintext attacks have been found against it, it is still useful
304 for low-level securicy.
305
306 Usage:
307 zd = _ZipDecrypter(mypwd)
308 plain_char = zd(cypher_char)
309 plain_text = map(zd, cypher_text)
310 """
311
312 def _GenerateCRCTable():
313 """Generate a CRC-32 table.
314
315 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
316 internal keys. We noticed that a direct implementation is faster than
317 relying on binascii.crc32().
318 """
319 poly = 0xedb88320
320 table = [0] * 256
321 for i in range(256):
322 crc = i
323 for j in range(8):
324 if crc & 1:
325 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
326 else:
327 crc = ((crc >> 1) & 0x7FFFFFFF)
328 table[i] = crc
329 return table
330 crctable = _GenerateCRCTable()
331
332 def _crc32(self, ch, crc):
333 """Compute the CRC32 primitive on one byte."""
334 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
335
336 def __init__(self, pwd):
337 self.key0 = 305419896
338 self.key1 = 591751049
339 self.key2 = 878082192
340 for p in pwd:
341 self._UpdateKeys(p)
342
343 def _UpdateKeys(self, c):
344 self.key0 = self._crc32(c, self.key0)
345 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
346 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
347 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
348
349 def __call__(self, c):
350 """Decrypt a single character."""
351 c = ord(c)
352 k = self.key2 | 2
353 c = c ^ (((k * (k^1)) >> 8) & 255)
354 c = chr(c)
355 self._UpdateKeys(c)
356 return c
357
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000358class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000359 """ Class with methods to open, read, write, close, list zip files.
360
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000361 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000362
Fred Drake3d9091e2001-03-26 15:49:24 +0000363 file: Either the path to the file, or a file-like object.
364 If it is a path, the file will be opened and closed by ZipFile.
365 mode: The mode can be either read "r", write "w" or append "a".
366 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000367 allowZip64: if True ZipFile will create files with ZIP64 extensions when
368 needed, otherwise it will raise an exception when this would
369 be necessary.
370
Fred Drake3d9091e2001-03-26 15:49:24 +0000371 """
Fred Drake484d7352000-10-02 21:14:52 +0000372
Fred Drake90eac282001-02-28 05:29:34 +0000373 fp = None # Set here since __del__ checks it
374
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000375 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000376 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000377 self._allowZip64 = allowZip64
378 self._didModify = False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000379 if compression == ZIP_STORED:
380 pass
381 elif compression == ZIP_DEFLATED:
382 if not zlib:
383 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000384 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000385 else:
386 raise RuntimeError, "That compression method is not supported"
Tim Peterse1190062001-01-15 03:34:38 +0000387 self.debug = 0 # Level of printing: 0 through 3
388 self.NameToInfo = {} # Find file info given name
389 self.filelist = [] # List of ZipInfo instances for archive
390 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000391 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000392 self.pwd = None
Tim Petersa19a1682001-03-29 04:36:09 +0000393
Fred Drake3d9091e2001-03-26 15:49:24 +0000394 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000395 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000396 self._filePassed = 0
397 self.filename = file
398 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
399 self.fp = open(file, modeDict[mode])
400 else:
401 self._filePassed = 1
402 self.fp = file
403 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000404
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000405 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000406 self._GetContents()
407 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000408 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000409 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000410 try: # See if file is a zip file
411 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000412 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000413 self.fp.seek(self.start_dir, 0)
414 except BadZipfile: # file is not a zip file, just append
415 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000416 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000417 if not self._filePassed:
418 self.fp.close()
419 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000420 raise RuntimeError, 'Mode must be "r", "w" or "a"'
421
422 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000423 """Read the directory, making sure we close the file if the format
424 is bad."""
425 try:
426 self._RealGetContents()
427 except BadZipfile:
428 if not self._filePassed:
429 self.fp.close()
430 self.fp = None
431 raise
432
433 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000434 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000435 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000436 endrec = _EndRecData(fp)
437 if not endrec:
438 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000439 if self.debug > 1:
440 print endrec
Tim Peterse1190062001-01-15 03:34:38 +0000441 size_cd = endrec[5] # bytes in central directory
442 offset_cd = endrec[6] # offset of central directory
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000443 self.comment = endrec[8] # archive comment
444 # endrec[9] is the offset of the "End of Central Dir" record
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000445 if endrec[9] > ZIP64_LIMIT:
446 x = endrec[9] - size_cd - 56 - 20
447 else:
448 x = endrec[9] - size_cd
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000449 # "concat" is zero, unless zip was concatenated to another file
450 concat = x - offset_cd
451 if self.debug > 2:
452 print "given, inferred, offset", offset_cd, x, concat
453 # self.start_dir: Position of start of central directory
454 self.start_dir = offset_cd + concat
455 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000456 data = fp.read(size_cd)
457 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000458 total = 0
459 while total < size_cd:
460 centdir = fp.read(46)
461 total = total + 46
462 if centdir[0:4] != stringCentralDir:
463 raise BadZipfile, "Bad magic number for central directory"
464 centdir = struct.unpack(structCentralDir, centdir)
465 if self.debug > 2:
466 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000467 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000468 # Create ZipInfo instance to store file information
469 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000470 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
471 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
472 total = (total + centdir[_CD_FILENAME_LENGTH]
473 + centdir[_CD_EXTRA_FIELD_LENGTH]
474 + centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000475 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000476 (x.create_version, x.create_system, x.extract_version, x.reserved,
477 x.flag_bits, x.compress_type, t, d,
478 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
479 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
480 # Convert date/time code to (year, month, day, hour, min, sec)
481 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000482 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000483
484 x._decodeExtra()
485 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000486 self.filelist.append(x)
487 self.NameToInfo[x.filename] = x
488 if self.debug > 2:
489 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000490
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000491
492 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000493 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000494 l = []
495 for data in self.filelist:
496 l.append(data.filename)
497 return l
498
499 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000500 """Return a list of class ZipInfo instances for files in the
501 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000502 return self.filelist
503
504 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000505 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000506 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
507 for zinfo in self.filelist:
508 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
509 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
510
511 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000512 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000513 for zinfo in self.filelist:
514 try:
Tim Peterse1190062001-01-15 03:34:38 +0000515 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000516 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000517 return zinfo.filename
518
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000519
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000520 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000521 """Return the instance of ZipInfo given 'name'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000522 return self.NameToInfo[name]
523
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000524 def setpassword(self, pwd):
525 """Set default password for encrypted files."""
526 self.pwd = pwd
527
528 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000529 """Return file bytes (as a string) for name."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000530 if self.mode not in ("r", "a"):
531 raise RuntimeError, 'read() requires mode "r" or "a"'
532 if not self.fp:
533 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000534 "Attempt to read ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000535 zinfo = self.getinfo(name)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000536 is_encrypted = zinfo.flag_bits & 0x1
537 if is_encrypted:
538 if not pwd:
539 pwd = self.pwd
540 if not pwd:
541 raise RuntimeError, "File %s is encrypted, " \
542 "password required for extraction" % name
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000543 filepos = self.fp.tell()
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000544
545 self.fp.seek(zinfo.header_offset, 0)
546
547 # Skip the file header:
548 fheader = self.fp.read(30)
549 if fheader[0:4] != stringFileHeader:
550 raise BadZipfile, "Bad magic number for file header"
551
552 fheader = struct.unpack(structFileHeader, fheader)
553 fname = self.fp.read(fheader[_FH_FILENAME_LENGTH])
554 if fheader[_FH_EXTRA_FIELD_LENGTH]:
555 self.fp.read(fheader[_FH_EXTRA_FIELD_LENGTH])
556
557 if fname != zinfo.orig_filename:
558 raise BadZipfile, \
559 'File name in directory "%s" and header "%s" differ.' % (
560 zinfo.orig_filename, fname)
561
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000562 bytes = self.fp.read(zinfo.compress_size)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000563 # Go with decryption
564 if is_encrypted:
565 zd = _ZipDecrypter(pwd)
566 # The first 12 bytes in the cypher stream is an encryption header
567 # used to strengthen the algorithm. The first 11 bytes are
568 # completely random, while the 12th contains the MSB of the CRC,
569 # and is used to check the correctness of the password.
570 h = map(zd, bytes[0:12])
571 if ord(h[11]) != ((zinfo.CRC>>24)&255):
572 raise RuntimeError, "Bad password for file %s" % name
573 bytes = "".join(map(zd, bytes[12:]))
574 # Go with decompression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000575 self.fp.seek(filepos, 0)
576 if zinfo.compress_type == ZIP_STORED:
577 pass
578 elif zinfo.compress_type == ZIP_DEFLATED:
579 if not zlib:
580 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000581 "De-compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000582 # zlib compress/decompress code by Jeremy Hylton of CNRI
583 dc = zlib.decompressobj(-15)
584 bytes = dc.decompress(bytes)
585 # need to feed in unused pad byte so that zlib won't choke
586 ex = dc.decompress('Z') + dc.flush()
587 if ex:
588 bytes = bytes + ex
589 else:
590 raise BadZipfile, \
Fred Drake5db246d2000-09-29 20:44:48 +0000591 "Unsupported compression method %d for file %s" % \
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000592 (zinfo.compress_type, name)
593 crc = binascii.crc32(bytes)
594 if crc != zinfo.CRC:
595 raise BadZipfile, "Bad CRC-32 for file %s" % name
596 return bytes
597
598 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000599 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000600 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000601 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000602 print "Duplicate name:", zinfo.filename
603 if self.mode not in ("w", "a"):
604 raise RuntimeError, 'write() requires mode "w" or "a"'
605 if not self.fp:
606 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000607 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000608 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
609 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000610 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000611 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
612 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000613 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000614 if zinfo.file_size > ZIP64_LIMIT:
615 if not self._allowZip64:
616 raise LargeZipFile("Filesize would require ZIP64 extensions")
617 if zinfo.header_offset > ZIP64_LIMIT:
618 if not self._allowZip64:
619 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000620
621 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000622 """Put the bytes from filename into the archive under the name
623 arcname."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000624 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000625 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000626 date_time = mtime[0:6]
627 # Create ZipInfo instance to store file information
628 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000629 arcname = filename
630 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
631 while arcname[0] in (os.sep, os.altsep):
632 arcname = arcname[1:]
633 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +0000634 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000635 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000636 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000637 else:
Tim Peterse1190062001-01-15 03:34:38 +0000638 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000639
640 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000641 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000642 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000643
644 self._writecheck(zinfo)
645 self._didModify = True
646 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000647 # Must overwrite CRC and sizes with correct data later
648 zinfo.CRC = CRC = 0
649 zinfo.compress_size = compress_size = 0
650 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000651 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000652 if zinfo.compress_type == ZIP_DEFLATED:
653 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
654 zlib.DEFLATED, -15)
655 else:
656 cmpr = None
657 while 1:
658 buf = fp.read(1024 * 8)
659 if not buf:
660 break
661 file_size = file_size + len(buf)
662 CRC = binascii.crc32(buf, CRC)
663 if cmpr:
664 buf = cmpr.compress(buf)
665 compress_size = compress_size + len(buf)
666 self.fp.write(buf)
667 fp.close()
668 if cmpr:
669 buf = cmpr.flush()
670 compress_size = compress_size + len(buf)
671 self.fp.write(buf)
672 zinfo.compress_size = compress_size
673 else:
674 zinfo.compress_size = file_size
675 zinfo.CRC = CRC
676 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000677 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000678 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000679 self.fp.seek(zinfo.header_offset + 14, 0)
Brett Cannonff450f72004-07-10 19:09:20 +0000680 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000681 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000682 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000683 self.filelist.append(zinfo)
684 self.NameToInfo[zinfo.filename] = zinfo
685
Just van Rossumb083cb32002-12-12 12:23:32 +0000686 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000687 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +0000688 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
689 the name of the file in the archive."""
690 if not isinstance(zinfo_or_arcname, ZipInfo):
691 zinfo = ZipInfo(filename=zinfo_or_arcname,
692 date_time=time.localtime(time.time()))
693 zinfo.compress_type = self.compression
694 else:
695 zinfo = zinfo_or_arcname
Tim Peterse1190062001-01-15 03:34:38 +0000696 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000697 zinfo.header_offset = self.fp.tell() # Start of header bytes
698 self._writecheck(zinfo)
699 self._didModify = True
Tim Peterse1190062001-01-15 03:34:38 +0000700 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000701 if zinfo.compress_type == ZIP_DEFLATED:
702 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
703 zlib.DEFLATED, -15)
704 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000705 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000706 else:
707 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000708 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000709 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000710 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000711 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000712 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000713 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +0000714 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +0000715 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000716 self.filelist.append(zinfo)
717 self.NameToInfo[zinfo.filename] = zinfo
718
719 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +0000720 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000721 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000722
723 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +0000724 """Close the file, and for mode "w" and "a" write the ending
725 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000726 if self.fp is None:
727 return
Tim Petersa608bb22006-06-15 18:06:29 +0000728
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000729 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000730 count = 0
731 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +0000732 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000733 count = count + 1
734 dt = zinfo.date_time
735 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000736 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000737 extra = []
738 if zinfo.file_size > ZIP64_LIMIT \
739 or zinfo.compress_size > ZIP64_LIMIT:
740 extra.append(zinfo.file_size)
741 extra.append(zinfo.compress_size)
742 file_size = 0xffffffff #-1
743 compress_size = 0xffffffff #-1
744 else:
745 file_size = zinfo.file_size
746 compress_size = zinfo.compress_size
747
748 if zinfo.header_offset > ZIP64_LIMIT:
749 extra.append(zinfo.header_offset)
Tim Petersf79c32d2006-07-31 02:53:03 +0000750 header_offset = -1 # struct "l" format: 32 one bits
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000751 else:
752 header_offset = zinfo.header_offset
753
754 extra_data = zinfo.extra
755 if extra:
756 # Append a ZIP64 field to the extra's
757 extra_data = struct.pack(
758 '<hh' + 'q'*len(extra),
759 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +0000760
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000761 extract_version = max(45, zinfo.extract_version)
762 create_version = max(45, zinfo.create_version)
763 else:
764 extract_version = zinfo.extract_version
765 create_version = zinfo.create_version
766
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000767 centdir = struct.pack(structCentralDir,
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000768 stringCentralDir, create_version,
769 zinfo.create_system, extract_version, zinfo.reserved,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000770 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000771 zinfo.CRC, compress_size, file_size,
772 len(zinfo.filename), len(extra_data), len(zinfo.comment),
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000773 0, zinfo.internal_attr, zinfo.external_attr,
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000774 header_offset)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000775 self.fp.write(centdir)
776 self.fp.write(zinfo.filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000777 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000778 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000779
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000780 pos2 = self.fp.tell()
781 # Write end-of-zip-archive record
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000782 if pos1 > ZIP64_LIMIT:
783 # Need to write the ZIP64 end-of-archive records
784 zip64endrec = struct.pack(
785 structEndArchive64, stringEndArchive64,
786 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
787 self.fp.write(zip64endrec)
788
789 zip64locrec = struct.pack(
Tim Petersa608bb22006-06-15 18:06:29 +0000790 structEndArchive64Locator,
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000791 stringEndArchive64Locator, 0, pos2, 1)
792 self.fp.write(zip64locrec)
793
Tim Peters352bf0d2006-07-31 02:40:23 +0000794 # XXX Why is `pos3` computed next? It's never referenced.
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000795 pos3 = self.fp.tell()
796 endrec = struct.pack(structEndArchive, stringEndArchive,
Tim Peters352bf0d2006-07-31 02:40:23 +0000797 0, 0, count, count, pos2 - pos1, -1, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000798 self.fp.write(endrec)
799
800 else:
801 endrec = struct.pack(structEndArchive, stringEndArchive,
802 0, 0, count, count, pos2 - pos1, pos1, 0)
803 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +0000804 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +0000805 if not self._filePassed:
806 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000807 self.fp = None
808
809
810class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +0000811 """Class to create ZIP archives with Python library files and packages."""
812
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000813 def writepy(self, pathname, basename = ""):
814 """Add all files from "pathname" to the ZIP archive.
815
Fred Drake484d7352000-10-02 21:14:52 +0000816 If pathname is a package directory, search the directory and
817 all package subdirectories recursively for all *.py and enter
818 the modules into the archive. If pathname is a plain
819 directory, listdir *.py and enter all modules. Else, pathname
820 must be a Python *.py file and the module will be put into the
821 archive. Added modules are always module.pyo or module.pyc.
822 This method will compile the module.py into module.pyc if
823 necessary.
824 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000825 dir, name = os.path.split(pathname)
826 if os.path.isdir(pathname):
827 initname = os.path.join(pathname, "__init__.py")
828 if os.path.isfile(initname):
829 # This is a package directory, add it
830 if basename:
831 basename = "%s/%s" % (basename, name)
832 else:
833 basename = name
834 if self.debug:
835 print "Adding package in", pathname, "as", basename
836 fname, arcname = self._get_codename(initname[0:-3], basename)
837 if self.debug:
838 print "Adding", arcname
839 self.write(fname, arcname)
840 dirlist = os.listdir(pathname)
841 dirlist.remove("__init__.py")
842 # Add all *.py files and package subdirectories
843 for filename in dirlist:
844 path = os.path.join(pathname, filename)
845 root, ext = os.path.splitext(filename)
846 if os.path.isdir(path):
847 if os.path.isfile(os.path.join(path, "__init__.py")):
848 # This is a package directory, add it
849 self.writepy(path, basename) # Recursive call
850 elif ext == ".py":
851 fname, arcname = self._get_codename(path[0:-3],
852 basename)
853 if self.debug:
854 print "Adding", arcname
855 self.write(fname, arcname)
856 else:
857 # This is NOT a package directory, add its files at top level
858 if self.debug:
859 print "Adding files from directory", pathname
860 for filename in os.listdir(pathname):
861 path = os.path.join(pathname, filename)
862 root, ext = os.path.splitext(filename)
863 if ext == ".py":
864 fname, arcname = self._get_codename(path[0:-3],
865 basename)
866 if self.debug:
867 print "Adding", arcname
868 self.write(fname, arcname)
869 else:
870 if pathname[-3:] != ".py":
871 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000872 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000873 fname, arcname = self._get_codename(pathname[0:-3], basename)
874 if self.debug:
875 print "Adding file", arcname
876 self.write(fname, arcname)
877
878 def _get_codename(self, pathname, basename):
879 """Return (filename, archivename) for the path.
880
Fred Drake484d7352000-10-02 21:14:52 +0000881 Given a module name path, return the correct file path and
882 archive name, compiling if necessary. For example, given
883 /python/lib/string, return (/python/lib/string.pyc, string).
884 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000885 file_py = pathname + ".py"
886 file_pyc = pathname + ".pyc"
887 file_pyo = pathname + ".pyo"
888 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000889 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +0000890 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000891 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000892 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +0000893 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000894 if self.debug:
895 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +0000896 try:
897 py_compile.compile(file_py, file_pyc, None, True)
898 except py_compile.PyCompileError,err:
899 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000900 fname = file_pyc
901 else:
902 fname = file_pyc
903 archivename = os.path.split(fname)[1]
904 if basename:
905 archivename = "%s/%s" % (basename, archivename)
906 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000907
908
909def main(args = None):
910 import textwrap
911 USAGE=textwrap.dedent("""\
912 Usage:
913 zipfile.py -l zipfile.zip # Show listing of a zipfile
914 zipfile.py -t zipfile.zip # Test if a zipfile is valid
915 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
916 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
917 """)
918 if args is None:
919 args = sys.argv[1:]
920
921 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
922 print USAGE
923 sys.exit(1)
924
925 if args[0] == '-l':
926 if len(args) != 2:
927 print USAGE
928 sys.exit(1)
929 zf = ZipFile(args[1], 'r')
930 zf.printdir()
931 zf.close()
932
933 elif args[0] == '-t':
934 if len(args) != 2:
935 print USAGE
936 sys.exit(1)
937 zf = ZipFile(args[1], 'r')
938 zf.testzip()
939 print "Done testing"
940
941 elif args[0] == '-e':
942 if len(args) != 3:
943 print USAGE
944 sys.exit(1)
945
946 zf = ZipFile(args[1], 'r')
947 out = args[2]
948 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +0000949 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000950 tgt = os.path.join(out, path[2:])
951 else:
952 tgt = os.path.join(out, path)
953
954 tgtdir = os.path.dirname(tgt)
955 if not os.path.exists(tgtdir):
956 os.makedirs(tgtdir)
957 fp = open(tgt, 'wb')
958 fp.write(zf.read(path))
959 fp.close()
960 zf.close()
961
962 elif args[0] == '-c':
963 if len(args) < 3:
964 print USAGE
965 sys.exit(1)
966
967 def addToZip(zf, path, zippath):
968 if os.path.isfile(path):
969 zf.write(path, zippath, ZIP_DEFLATED)
970 elif os.path.isdir(path):
971 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +0000972 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000973 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +0000974 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000975
976 zf = ZipFile(args[1], 'w', allowZip64=True)
977 for src in args[2:]:
978 addToZip(zf, src, os.path.basename(src))
979
980 zf.close()
981
982if __name__ == "__main__":
983 main()