blob: d0a1f6549ab88a6fd067bc83cc60ecdb426d5dd5 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
3"""
Martin v. Löwis00756902006-02-05 17:09:41 +00004import struct, os, time, sys
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005import binascii, cStringIO
Guido van Rossum32abe6f2000-03-31 17:30:02 +00006
7try:
Tim Peterse1190062001-01-15 03:34:38 +00008 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +00009except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000010 zlib = None
11
Skip Montanaro40fc1602001-03-01 04:27:19 +000012__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000013 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000014
Fred Drake5db246d2000-09-29 20:44:48 +000015class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000016 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000017
18
19class LargeZipFile(Exception):
20 """
21 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
22 and those extensions are disabled.
23 """
24
Tim Peterse1190062001-01-15 03:34:38 +000025error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000026
Thomas Wouters0e3f5912006-08-11 14:57:12 +000027ZIP64_LIMIT= (1 << 31) - 1
28
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029# constants for Zip file compression methods
30ZIP_STORED = 0
31ZIP_DEFLATED = 8
32# Other ZIP compression methods not supported
33
34# Here are some struct module formats for reading headers
35structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
36stringEndArchive = "PK\005\006" # magic number for end of archive record
Brett Cannonff450f72004-07-10 19:09:20 +000037structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000038stringCentralDir = "PK\001\002" # magic number for central directory
Brett Cannonff450f72004-07-10 19:09:20 +000039structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040stringFileHeader = "PK\003\004" # magic number for file header
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
42stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
43structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
44stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
45
Guido van Rossum32abe6f2000-03-31 17:30:02 +000046
Fred Drake3e038e52001-02-28 17:56:26 +000047# indexes of entries in the central directory structure
48_CD_SIGNATURE = 0
49_CD_CREATE_VERSION = 1
50_CD_CREATE_SYSTEM = 2
51_CD_EXTRACT_VERSION = 3
52_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
53_CD_FLAG_BITS = 5
54_CD_COMPRESS_TYPE = 6
55_CD_TIME = 7
56_CD_DATE = 8
57_CD_CRC = 9
58_CD_COMPRESSED_SIZE = 10
59_CD_UNCOMPRESSED_SIZE = 11
60_CD_FILENAME_LENGTH = 12
61_CD_EXTRA_FIELD_LENGTH = 13
62_CD_COMMENT_LENGTH = 14
63_CD_DISK_NUMBER_START = 15
64_CD_INTERNAL_FILE_ATTRIBUTES = 16
65_CD_EXTERNAL_FILE_ATTRIBUTES = 17
66_CD_LOCAL_HEADER_OFFSET = 18
67
68# indexes of entries in the local file header structure
69_FH_SIGNATURE = 0
70_FH_EXTRACT_VERSION = 1
71_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
72_FH_GENERAL_PURPOSE_FLAG_BITS = 3
73_FH_COMPRESSION_METHOD = 4
74_FH_LAST_MOD_TIME = 5
75_FH_LAST_MOD_DATE = 6
76_FH_CRC = 7
77_FH_COMPRESSED_SIZE = 8
78_FH_UNCOMPRESSED_SIZE = 9
79_FH_FILENAME_LENGTH = 10
80_FH_EXTRA_FIELD_LENGTH = 11
81
Guido van Rossum32abe6f2000-03-31 17:30:02 +000082def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000083 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +000084 try:
85 fpin = open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000086 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +000087 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000088 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +000089 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000090 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000091 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000092 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000093
Thomas Wouters0e3f5912006-08-11 14:57:12 +000094def _EndRecData64(fpin, offset, endrec):
95 """
96 Read the ZIP64 end-of-archive records and use that to update endrec
97 """
98 locatorSize = struct.calcsize(structEndArchive64Locator)
99 fpin.seek(offset - locatorSize, 2)
100 data = fpin.read(locatorSize)
101 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
102 if sig != stringEndArchive64Locator:
103 return endrec
104
105 if diskno != 0 or disks != 1:
106 raise BadZipfile("zipfiles that span multiple disks are not supported")
107
108 # Assume no 'zip64 extensible data'
109 endArchiveSize = struct.calcsize(structEndArchive64)
110 fpin.seek(offset - locatorSize - endArchiveSize, 2)
111 data = fpin.read(endArchiveSize)
112 sig, sz, create_version, read_version, disk_num, disk_dir, \
113 dircount, dircount2, dirsize, diroffset = \
114 struct.unpack(structEndArchive64, data)
115 if sig != stringEndArchive64:
116 return endrec
117
118 # Update the original endrec using data from the ZIP64 record
119 endrec[1] = disk_num
120 endrec[2] = disk_dir
121 endrec[3] = dircount
122 endrec[4] = dircount2
123 endrec[5] = dirsize
124 endrec[6] = diroffset
125 return endrec
126
127
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000128def _EndRecData(fpin):
129 """Return data from the "End of Central Directory" record, or None.
130
131 The data is a list of the nine items in the ZIP "End of central dir"
132 record followed by a tenth item, the file seek offset of this record."""
133 fpin.seek(-22, 2) # Assume no archive comment.
134 filesize = fpin.tell() + 22 # Get file size
135 data = fpin.read()
136 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
137 endrec = struct.unpack(structEndArchive, data)
138 endrec = list(endrec)
139 endrec.append("") # Append the archive comment
140 endrec.append(filesize - 22) # Append the record start offset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000141 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
142 return _EndRecData64(fpin, -22, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000143 return endrec
144 # Search the last END_BLOCK bytes of the file for the record signature.
145 # The comment is appended to the ZIP file and has a 16 bit length.
146 # So the comment may be up to 64K long. We limit the search for the
147 # signature to a few Kbytes at the end of the file for efficiency.
148 # also, the signature must not appear in the comment.
149 END_BLOCK = min(filesize, 1024 * 4)
150 fpin.seek(filesize - END_BLOCK, 0)
151 data = fpin.read()
152 start = data.rfind(stringEndArchive)
153 if start >= 0: # Correct signature string was found
154 endrec = struct.unpack(structEndArchive, data[start:start+22])
155 endrec = list(endrec)
156 comment = data[start+22:]
157 if endrec[7] == len(comment): # Comment length checks out
158 # Append the archive comment and start offset
159 endrec.append(comment)
160 endrec.append(filesize - END_BLOCK + start)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000161 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
162 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000163 return endrec
164 return # Error, return None
165
Fred Drake484d7352000-10-02 21:14:52 +0000166
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000167class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000168 """Class with attributes describing each file in the ZIP archive."""
169
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000170 __slots__ = (
171 'orig_filename',
172 'filename',
173 'date_time',
174 'compress_type',
175 'comment',
176 'extra',
177 'create_system',
178 'create_version',
179 'extract_version',
180 'reserved',
181 'flag_bits',
182 'volume',
183 'internal_attr',
184 'external_attr',
185 'header_offset',
186 'CRC',
187 'compress_size',
188 'file_size',
189 )
190
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000191 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000192 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193
194 # Terminate the file name at the first null byte. Null bytes in file
195 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000196 null_byte = filename.find(chr(0))
197 if null_byte >= 0:
198 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000199 # This is used to ensure paths in generated ZIP files always use
200 # forward slashes as the directory separator, as required by the
201 # ZIP format specification.
202 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000203 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000204
Greg Ward8e36d282003-06-18 00:53:06 +0000205 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000206 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000207 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000208 self.compress_type = ZIP_STORED # Type of compression for the file
209 self.comment = "" # Comment for each file
210 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000211 if sys.platform == 'win32':
212 self.create_system = 0 # System which created ZIP archive
213 else:
214 # Assume everything else is unix-y
215 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000216 self.create_version = 20 # Version which created ZIP archive
217 self.extract_version = 20 # Version needed to extract archive
218 self.reserved = 0 # Must be zero
219 self.flag_bits = 0 # ZIP flag bits
220 self.volume = 0 # Volume number of file header
221 self.internal_attr = 0 # Internal attributes
222 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000223 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000224 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000225 # CRC CRC-32 of the uncompressed file
226 # compress_size Size of the compressed file
227 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000228
229 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000230 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000231 dt = self.date_time
232 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000233 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000234 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000235 # Set these to zero because we write them after the file data
236 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000237 else:
Tim Peterse1190062001-01-15 03:34:38 +0000238 CRC = self.CRC
239 compress_size = self.compress_size
240 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000241
242 extra = self.extra
243
244 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
245 # File is larger than what fits into a 4 byte integer,
246 # fall back to the ZIP64 extension
247 fmt = '<hhqq'
248 extra = extra + struct.pack(fmt,
249 1, struct.calcsize(fmt)-4, file_size, compress_size)
250 file_size = 0xffffffff # -1
251 compress_size = 0xffffffff # -1
252 self.extract_version = max(45, self.extract_version)
253 self.create_version = max(45, self.extract_version)
254
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000255 header = struct.pack(structFileHeader, stringFileHeader,
256 self.extract_version, self.reserved, self.flag_bits,
257 self.compress_type, dostime, dosdate, CRC,
258 compress_size, file_size,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000259 len(self.filename), len(extra))
260 return header + self.filename + extra
261
262 def _decodeExtra(self):
263 # Try to decode the extra field.
264 extra = self.extra
265 unpack = struct.unpack
266 while extra:
267 tp, ln = unpack('<hh', extra[:4])
268 if tp == 1:
269 if ln >= 24:
270 counts = unpack('<qqq', extra[4:28])
271 elif ln == 16:
272 counts = unpack('<qq', extra[4:20])
273 elif ln == 8:
274 counts = unpack('<q', extra[4:12])
275 elif ln == 0:
276 counts = ()
277 else:
278 raise RuntimeError, "Corrupt extra field %s"%(ln,)
279
280 idx = 0
281
282 # ZIP64 extension (large files and/or large archives)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000283 if self.file_size == -1 or self.file_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000284 self.file_size = counts[idx]
285 idx += 1
286
Guido van Rossume2a383d2007-01-15 16:59:06 +0000287 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000288 self.compress_size = counts[idx]
289 idx += 1
290
Guido van Rossume2a383d2007-01-15 16:59:06 +0000291 if self.header_offset == -1 or self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000292 old = self.header_offset
293 self.header_offset = counts[idx]
294 idx+=1
295
296 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000297
298
Thomas Wouterscf297e42007-02-23 15:07:44 +0000299class _ZipDecrypter:
300 """Class to handle decryption of files stored within a ZIP archive.
301
302 ZIP supports a password-based form of encryption. Even though known
303 plaintext attacks have been found against it, it is still useful
304 for low-level securicy.
305
306 Usage:
307 zd = _ZipDecrypter(mypwd)
308 plain_char = zd(cypher_char)
309 plain_text = map(zd, cypher_text)
310 """
311
312 def _GenerateCRCTable():
313 """Generate a CRC-32 table.
314
315 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
316 internal keys. We noticed that a direct implementation is faster than
317 relying on binascii.crc32().
318 """
319 poly = 0xedb88320
320 table = [0] * 256
321 for i in range(256):
322 crc = i
323 for j in range(8):
324 if crc & 1:
325 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
326 else:
327 crc = ((crc >> 1) & 0x7FFFFFFF)
328 table[i] = crc
329 return table
330 crctable = _GenerateCRCTable()
331
332 def _crc32(self, ch, crc):
333 """Compute the CRC32 primitive on one byte."""
334 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
335
336 def __init__(self, pwd):
337 self.key0 = 305419896
338 self.key1 = 591751049
339 self.key2 = 878082192
340 for p in pwd:
341 self._UpdateKeys(p)
342
343 def _UpdateKeys(self, c):
344 self.key0 = self._crc32(c, self.key0)
345 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
346 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
347 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
348
349 def __call__(self, c):
350 """Decrypt a single character."""
Guido van Rossum98f97462007-04-13 03:31:13 +0000351 # XXX When this is called with a byte instead of a char, ord()
352 # isn't needed. Don't die in that case. In the future we should
353 # just leave this out, once we're always using bytes.
354 try:
355 c = ord(c)
356 except TypeError:
357 pass
Thomas Wouterscf297e42007-02-23 15:07:44 +0000358 k = self.key2 | 2
359 c = c ^ (((k * (k^1)) >> 8) & 255)
360 c = chr(c)
361 self._UpdateKeys(c)
362 return c
363
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000364class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000365 """ Class with methods to open, read, write, close, list zip files.
366
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000367 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000368
Fred Drake3d9091e2001-03-26 15:49:24 +0000369 file: Either the path to the file, or a file-like object.
370 If it is a path, the file will be opened and closed by ZipFile.
371 mode: The mode can be either read "r", write "w" or append "a".
372 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000373 allowZip64: if True ZipFile will create files with ZIP64 extensions when
374 needed, otherwise it will raise an exception when this would
375 be necessary.
376
Fred Drake3d9091e2001-03-26 15:49:24 +0000377 """
Fred Drake484d7352000-10-02 21:14:52 +0000378
Fred Drake90eac282001-02-28 05:29:34 +0000379 fp = None # Set here since __del__ checks it
380
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000381 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000382 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000383 self._allowZip64 = allowZip64
384 self._didModify = False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000385 if compression == ZIP_STORED:
386 pass
387 elif compression == ZIP_DEFLATED:
388 if not zlib:
389 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000390 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000391 else:
392 raise RuntimeError, "That compression method is not supported"
Tim Peterse1190062001-01-15 03:34:38 +0000393 self.debug = 0 # Level of printing: 0 through 3
394 self.NameToInfo = {} # Find file info given name
395 self.filelist = [] # List of ZipInfo instances for archive
396 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000397 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000398 self.pwd = None
Tim Petersa19a1682001-03-29 04:36:09 +0000399
Fred Drake3d9091e2001-03-26 15:49:24 +0000400 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000401 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000402 self._filePassed = 0
403 self.filename = file
404 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000405 try:
406 self.fp = open(file, modeDict[mode])
407 except IOError:
408 if mode == 'a':
409 mode = key = 'w'
410 self.fp = open(file, modeDict[mode])
411 else:
412 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000413 else:
414 self._filePassed = 1
415 self.fp = file
416 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000417
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000418 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000419 self._GetContents()
420 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000421 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000422 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000423 try: # See if file is a zip file
424 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000425 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000426 self.fp.seek(self.start_dir, 0)
427 except BadZipfile: # file is not a zip file, just append
428 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000429 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000430 if not self._filePassed:
431 self.fp.close()
432 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000433 raise RuntimeError, 'Mode must be "r", "w" or "a"'
434
435 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000436 """Read the directory, making sure we close the file if the format
437 is bad."""
438 try:
439 self._RealGetContents()
440 except BadZipfile:
441 if not self._filePassed:
442 self.fp.close()
443 self.fp = None
444 raise
445
446 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000447 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000448 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000449 endrec = _EndRecData(fp)
450 if not endrec:
451 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000452 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000453 print(endrec)
Tim Peterse1190062001-01-15 03:34:38 +0000454 size_cd = endrec[5] # bytes in central directory
455 offset_cd = endrec[6] # offset of central directory
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000456 self.comment = endrec[8] # archive comment
457 # endrec[9] is the offset of the "End of Central Dir" record
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000458 if endrec[9] > ZIP64_LIMIT:
459 x = endrec[9] - size_cd - 56 - 20
460 else:
461 x = endrec[9] - size_cd
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000462 # "concat" is zero, unless zip was concatenated to another file
463 concat = x - offset_cd
464 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000465 print("given, inferred, offset", offset_cd, x, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000466 # self.start_dir: Position of start of central directory
467 self.start_dir = offset_cd + concat
468 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000469 data = fp.read(size_cd)
470 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000471 total = 0
472 while total < size_cd:
473 centdir = fp.read(46)
474 total = total + 46
475 if centdir[0:4] != stringCentralDir:
476 raise BadZipfile, "Bad magic number for central directory"
477 centdir = struct.unpack(structCentralDir, centdir)
478 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000479 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000480 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000481 # Create ZipInfo instance to store file information
482 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000483 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
484 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
485 total = (total + centdir[_CD_FILENAME_LENGTH]
486 + centdir[_CD_EXTRA_FIELD_LENGTH]
487 + centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000488 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000489 (x.create_version, x.create_system, x.extract_version, x.reserved,
490 x.flag_bits, x.compress_type, t, d,
491 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
492 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
493 # Convert date/time code to (year, month, day, hour, min, sec)
494 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000495 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000496
497 x._decodeExtra()
498 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000499 self.filelist.append(x)
500 self.NameToInfo[x.filename] = x
501 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000502 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000503
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000504
505 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000506 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000507 l = []
508 for data in self.filelist:
509 l.append(data.filename)
510 return l
511
512 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000513 """Return a list of class ZipInfo instances for files in the
514 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000515 return self.filelist
516
517 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000518 """Print a table of contents for the zip file."""
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000519 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000520 for zinfo in self.filelist:
521 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000522 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000523
524 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000525 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000526 for zinfo in self.filelist:
527 try:
Tim Peterse1190062001-01-15 03:34:38 +0000528 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000529 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000530 return zinfo.filename
531
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000532
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000533 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000534 """Return the instance of ZipInfo given 'name'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000535 return self.NameToInfo[name]
536
Thomas Wouterscf297e42007-02-23 15:07:44 +0000537 def setpassword(self, pwd):
538 """Set default password for encrypted files."""
539 self.pwd = pwd
540
541 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000542 """Return file bytes (as a string) for name."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000543 if self.mode not in ("r", "a"):
544 raise RuntimeError, 'read() requires mode "r" or "a"'
545 if not self.fp:
546 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000547 "Attempt to read ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000548 zinfo = self.getinfo(name)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000549 is_encrypted = zinfo.flag_bits & 0x1
550 if is_encrypted:
551 if not pwd:
552 pwd = self.pwd
553 if not pwd:
554 raise RuntimeError, "File %s is encrypted, " \
555 "password required for extraction" % name
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000556 filepos = self.fp.tell()
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000557
558 self.fp.seek(zinfo.header_offset, 0)
559
560 # Skip the file header:
561 fheader = self.fp.read(30)
562 if fheader[0:4] != stringFileHeader:
563 raise BadZipfile, "Bad magic number for file header"
564
565 fheader = struct.unpack(structFileHeader, fheader)
566 fname = self.fp.read(fheader[_FH_FILENAME_LENGTH])
567 if fheader[_FH_EXTRA_FIELD_LENGTH]:
568 self.fp.read(fheader[_FH_EXTRA_FIELD_LENGTH])
569
570 if fname != zinfo.orig_filename:
571 raise BadZipfile, \
572 'File name in directory "%s" and header "%s" differ.' % (
573 zinfo.orig_filename, fname)
574
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000575 bytes = self.fp.read(zinfo.compress_size)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000576 # Go with decryption
577 if is_encrypted:
578 zd = _ZipDecrypter(pwd)
579 # The first 12 bytes in the cypher stream is an encryption header
580 # used to strengthen the algorithm. The first 11 bytes are
581 # completely random, while the 12th contains the MSB of the CRC,
582 # and is used to check the correctness of the password.
583 h = map(zd, bytes[0:12])
584 if ord(h[11]) != ((zinfo.CRC>>24)&255):
585 raise RuntimeError, "Bad password for file %s" % name
586 bytes = "".join(map(zd, bytes[12:]))
587 # Go with decompression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000588 self.fp.seek(filepos, 0)
589 if zinfo.compress_type == ZIP_STORED:
590 pass
591 elif zinfo.compress_type == ZIP_DEFLATED:
592 if not zlib:
593 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000594 "De-compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000595 # zlib compress/decompress code by Jeremy Hylton of CNRI
596 dc = zlib.decompressobj(-15)
597 bytes = dc.decompress(bytes)
598 # need to feed in unused pad byte so that zlib won't choke
599 ex = dc.decompress('Z') + dc.flush()
600 if ex:
601 bytes = bytes + ex
602 else:
603 raise BadZipfile, \
Fred Drake5db246d2000-09-29 20:44:48 +0000604 "Unsupported compression method %d for file %s" % \
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000605 (zinfo.compress_type, name)
606 crc = binascii.crc32(bytes)
607 if crc != zinfo.CRC:
608 raise BadZipfile, "Bad CRC-32 for file %s" % name
609 return bytes
610
611 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000612 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000613 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000614 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000615 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000616 if self.mode not in ("w", "a"):
617 raise RuntimeError, 'write() requires mode "w" or "a"'
618 if not self.fp:
619 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000620 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000621 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
622 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000623 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000624 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
625 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000626 "That compression method is not supported"
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000627 if zinfo.file_size > ZIP64_LIMIT:
628 if not self._allowZip64:
629 raise LargeZipFile("Filesize would require ZIP64 extensions")
630 if zinfo.header_offset > ZIP64_LIMIT:
631 if not self._allowZip64:
632 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000633
634 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000635 """Put the bytes from filename into the archive under the name
636 arcname."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000637 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000638 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000639 date_time = mtime[0:6]
640 # Create ZipInfo instance to store file information
641 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000642 arcname = filename
643 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
644 while arcname[0] in (os.sep, os.altsep):
645 arcname = arcname[1:]
646 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000647 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000648 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000649 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000650 else:
Tim Peterse1190062001-01-15 03:34:38 +0000651 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000652
653 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000654 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000655 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000656
657 self._writecheck(zinfo)
658 self._didModify = True
659 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000660 # Must overwrite CRC and sizes with correct data later
661 zinfo.CRC = CRC = 0
662 zinfo.compress_size = compress_size = 0
663 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000664 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000665 if zinfo.compress_type == ZIP_DEFLATED:
666 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
667 zlib.DEFLATED, -15)
668 else:
669 cmpr = None
670 while 1:
671 buf = fp.read(1024 * 8)
672 if not buf:
673 break
674 file_size = file_size + len(buf)
675 CRC = binascii.crc32(buf, CRC)
676 if cmpr:
677 buf = cmpr.compress(buf)
678 compress_size = compress_size + len(buf)
679 self.fp.write(buf)
680 fp.close()
681 if cmpr:
682 buf = cmpr.flush()
683 compress_size = compress_size + len(buf)
684 self.fp.write(buf)
685 zinfo.compress_size = compress_size
686 else:
687 zinfo.compress_size = file_size
688 zinfo.CRC = CRC
689 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000690 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000691 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000692 self.fp.seek(zinfo.header_offset + 14, 0)
Brett Cannonff450f72004-07-10 19:09:20 +0000693 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000694 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000695 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000696 self.filelist.append(zinfo)
697 self.NameToInfo[zinfo.filename] = zinfo
698
Just van Rossumb083cb32002-12-12 12:23:32 +0000699 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000700 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +0000701 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
702 the name of the file in the archive."""
703 if not isinstance(zinfo_or_arcname, ZipInfo):
704 zinfo = ZipInfo(filename=zinfo_or_arcname,
705 date_time=time.localtime(time.time()))
706 zinfo.compress_type = self.compression
707 else:
708 zinfo = zinfo_or_arcname
Tim Peterse1190062001-01-15 03:34:38 +0000709 zinfo.file_size = len(bytes) # Uncompressed size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000710 zinfo.header_offset = self.fp.tell() # Start of header bytes
711 self._writecheck(zinfo)
712 self._didModify = True
Tim Peterse1190062001-01-15 03:34:38 +0000713 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000714 if zinfo.compress_type == ZIP_DEFLATED:
715 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
716 zlib.DEFLATED, -15)
717 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000718 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000719 else:
720 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000721 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000722 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000723 self.fp.write(bytes)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000724 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000725 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000726 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +0000727 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +0000728 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000729 self.filelist.append(zinfo)
730 self.NameToInfo[zinfo.filename] = zinfo
731
732 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +0000733 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000734 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000735
736 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +0000737 """Close the file, and for mode "w" and "a" write the ending
738 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000739 if self.fp is None:
740 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000741
742 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000743 count = 0
744 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +0000745 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000746 count = count + 1
747 dt = zinfo.date_time
748 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000749 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000750 extra = []
751 if zinfo.file_size > ZIP64_LIMIT \
752 or zinfo.compress_size > ZIP64_LIMIT:
753 extra.append(zinfo.file_size)
754 extra.append(zinfo.compress_size)
755 file_size = 0xffffffff #-1
756 compress_size = 0xffffffff #-1
757 else:
758 file_size = zinfo.file_size
759 compress_size = zinfo.compress_size
760
761 if zinfo.header_offset > ZIP64_LIMIT:
762 extra.append(zinfo.header_offset)
763 header_offset = -1 # struct "l" format: 32 one bits
764 else:
765 header_offset = zinfo.header_offset
766
767 extra_data = zinfo.extra
768 if extra:
769 # Append a ZIP64 field to the extra's
770 extra_data = struct.pack(
771 '<hh' + 'q'*len(extra),
772 1, 8*len(extra), *extra) + extra_data
773
774 extract_version = max(45, zinfo.extract_version)
775 create_version = max(45, zinfo.create_version)
776 else:
777 extract_version = zinfo.extract_version
778 create_version = zinfo.create_version
779
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000780 centdir = struct.pack(structCentralDir,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000781 stringCentralDir, create_version,
782 zinfo.create_system, extract_version, zinfo.reserved,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000783 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000784 zinfo.CRC, compress_size, file_size,
785 len(zinfo.filename), len(extra_data), len(zinfo.comment),
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000786 0, zinfo.internal_attr, zinfo.external_attr,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000787 header_offset)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000788 self.fp.write(centdir)
789 self.fp.write(zinfo.filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000790 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000791 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000792
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000793 pos2 = self.fp.tell()
794 # Write end-of-zip-archive record
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000795 if pos1 > ZIP64_LIMIT:
796 # Need to write the ZIP64 end-of-archive records
797 zip64endrec = struct.pack(
798 structEndArchive64, stringEndArchive64,
799 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
800 self.fp.write(zip64endrec)
801
802 zip64locrec = struct.pack(
803 structEndArchive64Locator,
804 stringEndArchive64Locator, 0, pos2, 1)
805 self.fp.write(zip64locrec)
806
807 # XXX Why is `pos3` computed next? It's never referenced.
808 pos3 = self.fp.tell()
809 endrec = struct.pack(structEndArchive, stringEndArchive,
810 0, 0, count, count, pos2 - pos1, -1, 0)
811 self.fp.write(endrec)
812
813 else:
814 endrec = struct.pack(structEndArchive, stringEndArchive,
815 0, 0, count, count, pos2 - pos1, pos1, 0)
816 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +0000817 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +0000818 if not self._filePassed:
819 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000820 self.fp = None
821
822
823class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +0000824 """Class to create ZIP archives with Python library files and packages."""
825
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000826 def writepy(self, pathname, basename = ""):
827 """Add all files from "pathname" to the ZIP archive.
828
Fred Drake484d7352000-10-02 21:14:52 +0000829 If pathname is a package directory, search the directory and
830 all package subdirectories recursively for all *.py and enter
831 the modules into the archive. If pathname is a plain
832 directory, listdir *.py and enter all modules. Else, pathname
833 must be a Python *.py file and the module will be put into the
834 archive. Added modules are always module.pyo or module.pyc.
835 This method will compile the module.py into module.pyc if
836 necessary.
837 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000838 dir, name = os.path.split(pathname)
839 if os.path.isdir(pathname):
840 initname = os.path.join(pathname, "__init__.py")
841 if os.path.isfile(initname):
842 # This is a package directory, add it
843 if basename:
844 basename = "%s/%s" % (basename, name)
845 else:
846 basename = name
847 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000848 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000849 fname, arcname = self._get_codename(initname[0:-3], basename)
850 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000851 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000852 self.write(fname, arcname)
853 dirlist = os.listdir(pathname)
854 dirlist.remove("__init__.py")
855 # Add all *.py files and package subdirectories
856 for filename in dirlist:
857 path = os.path.join(pathname, filename)
858 root, ext = os.path.splitext(filename)
859 if os.path.isdir(path):
860 if os.path.isfile(os.path.join(path, "__init__.py")):
861 # This is a package directory, add it
862 self.writepy(path, basename) # Recursive call
863 elif ext == ".py":
864 fname, arcname = self._get_codename(path[0:-3],
865 basename)
866 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000867 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000868 self.write(fname, arcname)
869 else:
870 # This is NOT a package directory, add its files at top level
871 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000872 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000873 for filename in os.listdir(pathname):
874 path = os.path.join(pathname, filename)
875 root, ext = os.path.splitext(filename)
876 if ext == ".py":
877 fname, arcname = self._get_codename(path[0:-3],
878 basename)
879 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000880 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000881 self.write(fname, arcname)
882 else:
883 if pathname[-3:] != ".py":
884 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000885 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000886 fname, arcname = self._get_codename(pathname[0:-3], basename)
887 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000888 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000889 self.write(fname, arcname)
890
891 def _get_codename(self, pathname, basename):
892 """Return (filename, archivename) for the path.
893
Fred Drake484d7352000-10-02 21:14:52 +0000894 Given a module name path, return the correct file path and
895 archive name, compiling if necessary. For example, given
896 /python/lib/string, return (/python/lib/string.pyc, string).
897 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000898 file_py = pathname + ".py"
899 file_pyc = pathname + ".pyc"
900 file_pyo = pathname + ".pyo"
901 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000902 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +0000903 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000904 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000905 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +0000906 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000907 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000908 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +0000909 try:
910 py_compile.compile(file_py, file_pyc, None, True)
Guido van Rossumb940e112007-01-10 16:19:56 +0000911 except py_compile.PyCompileError as err:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000912 print(err.msg)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000913 fname = file_pyc
914 else:
915 fname = file_pyc
916 archivename = os.path.split(fname)[1]
917 if basename:
918 archivename = "%s/%s" % (basename, archivename)
919 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000920
921
922def main(args = None):
923 import textwrap
924 USAGE=textwrap.dedent("""\
925 Usage:
926 zipfile.py -l zipfile.zip # Show listing of a zipfile
927 zipfile.py -t zipfile.zip # Test if a zipfile is valid
928 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
929 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
930 """)
931 if args is None:
932 args = sys.argv[1:]
933
934 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000935 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000936 sys.exit(1)
937
938 if args[0] == '-l':
939 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000940 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000941 sys.exit(1)
942 zf = ZipFile(args[1], 'r')
943 zf.printdir()
944 zf.close()
945
946 elif args[0] == '-t':
947 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000948 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000949 sys.exit(1)
950 zf = ZipFile(args[1], 'r')
951 zf.testzip()
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000952 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000953
954 elif args[0] == '-e':
955 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000956 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000957 sys.exit(1)
958
959 zf = ZipFile(args[1], 'r')
960 out = args[2]
961 for path in zf.namelist():
962 if path.startswith('./'):
963 tgt = os.path.join(out, path[2:])
964 else:
965 tgt = os.path.join(out, path)
966
967 tgtdir = os.path.dirname(tgt)
968 if not os.path.exists(tgtdir):
969 os.makedirs(tgtdir)
970 fp = open(tgt, 'wb')
971 fp.write(zf.read(path))
972 fp.close()
973 zf.close()
974
975 elif args[0] == '-c':
976 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000977 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000978 sys.exit(1)
979
980 def addToZip(zf, path, zippath):
981 if os.path.isfile(path):
982 zf.write(path, zippath, ZIP_DEFLATED)
983 elif os.path.isdir(path):
984 for nm in os.listdir(path):
985 addToZip(zf,
986 os.path.join(path, nm), os.path.join(zippath, nm))
987 # else: ignore
988
989 zf = ZipFile(args[1], 'w', allowZip64=True)
990 for src in args[2:]:
991 addToZip(zf, src, os.path.basename(src))
992
993 zf.close()
994
995if __name__ == "__main__":
996 main()