blob: 73b62afcbf828c35f8ed6cafcc0d8cb281397dfb [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Christian Heimes790c8232008-01-07 21:14:23 +00006import struct, os, time, sys, shutil
Guido van Rossum68937b42007-05-18 00:51:22 +00007import binascii, io
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000021
22
23class LargeZipFile(Exception):
24 """
25 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Thomas Wouters0e3f5912006-08-11 14:57:12 +000031ZIP64_LIMIT= (1 << 31) - 1
32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000033# constants for Zip file compression methods
34ZIP_STORED = 0
35ZIP_DEFLATED = 8
36# Other ZIP compression methods not supported
37
38# Here are some struct module formats for reading headers
Christian Heimesfdab48e2008-01-20 09:06:41 +000039structEndArchive = "<4s4H2LH" # 9 items, end of archive, 22 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000040stringEndArchive = b"PK\005\006" # magic number for end of archive record
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000041structCentralDir = "<4s4B4HLLL5HLL"# 19 items, central directory, 46 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000042stringCentralDir = b"PK\001\002" # magic number for central directory
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000043structFileHeader = "<4s2B4HLLL2H" # 12 items, file header record, 30 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000044stringFileHeader = b"PK\003\004" # magic number for file header
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000045structEndArchive64Locator = "<4sLQL" # 4 items, locate Zip64 header, 20 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000046stringEndArchive64Locator = b"PK\x06\x07" # magic token for locator header
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000047structEndArchive64 = "<4sQHHLLQQQQ" # 10 items, end of archive (Zip64), 56 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000048stringEndArchive64 = b"PK\x06\x06" # magic token for Zip64 header
Thomas Wouters0e3f5912006-08-11 14:57:12 +000049
Guido van Rossum32abe6f2000-03-31 17:30:02 +000050
Fred Drake3e038e52001-02-28 17:56:26 +000051# indexes of entries in the central directory structure
52_CD_SIGNATURE = 0
53_CD_CREATE_VERSION = 1
54_CD_CREATE_SYSTEM = 2
55_CD_EXTRACT_VERSION = 3
56_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
57_CD_FLAG_BITS = 5
58_CD_COMPRESS_TYPE = 6
59_CD_TIME = 7
60_CD_DATE = 8
61_CD_CRC = 9
62_CD_COMPRESSED_SIZE = 10
63_CD_UNCOMPRESSED_SIZE = 11
64_CD_FILENAME_LENGTH = 12
65_CD_EXTRA_FIELD_LENGTH = 13
66_CD_COMMENT_LENGTH = 14
67_CD_DISK_NUMBER_START = 15
68_CD_INTERNAL_FILE_ATTRIBUTES = 16
69_CD_EXTERNAL_FILE_ATTRIBUTES = 17
70_CD_LOCAL_HEADER_OFFSET = 18
71
72# indexes of entries in the local file header structure
73_FH_SIGNATURE = 0
74_FH_EXTRACT_VERSION = 1
75_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
76_FH_GENERAL_PURPOSE_FLAG_BITS = 3
77_FH_COMPRESSION_METHOD = 4
78_FH_LAST_MOD_TIME = 5
79_FH_LAST_MOD_DATE = 6
80_FH_CRC = 7
81_FH_COMPRESSED_SIZE = 8
82_FH_UNCOMPRESSED_SIZE = 9
83_FH_FILENAME_LENGTH = 10
84_FH_EXTRA_FIELD_LENGTH = 11
85
Guido van Rossum32abe6f2000-03-31 17:30:02 +000086def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000087 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +000088 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +000089 fpin = io.open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000090 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +000091 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000092 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +000093 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000094 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000095 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000096 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000097
Thomas Wouters0e3f5912006-08-11 14:57:12 +000098def _EndRecData64(fpin, offset, endrec):
99 """
100 Read the ZIP64 end-of-archive records and use that to update endrec
101 """
102 locatorSize = struct.calcsize(structEndArchive64Locator)
103 fpin.seek(offset - locatorSize, 2)
104 data = fpin.read(locatorSize)
105 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
106 if sig != stringEndArchive64Locator:
107 return endrec
108
109 if diskno != 0 or disks != 1:
110 raise BadZipfile("zipfiles that span multiple disks are not supported")
111
112 # Assume no 'zip64 extensible data'
113 endArchiveSize = struct.calcsize(structEndArchive64)
114 fpin.seek(offset - locatorSize - endArchiveSize, 2)
115 data = fpin.read(endArchiveSize)
116 sig, sz, create_version, read_version, disk_num, disk_dir, \
117 dircount, dircount2, dirsize, diroffset = \
118 struct.unpack(structEndArchive64, data)
119 if sig != stringEndArchive64:
120 return endrec
121
122 # Update the original endrec using data from the ZIP64 record
123 endrec[1] = disk_num
124 endrec[2] = disk_dir
125 endrec[3] = dircount
126 endrec[4] = dircount2
127 endrec[5] = dirsize
128 endrec[6] = diroffset
129 return endrec
130
131
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000132def _EndRecData(fpin):
133 """Return data from the "End of Central Directory" record, or None.
134
135 The data is a list of the nine items in the ZIP "End of central dir"
136 record followed by a tenth item, the file seek offset of this record."""
137 fpin.seek(-22, 2) # Assume no archive comment.
138 filesize = fpin.tell() + 22 # Get file size
139 data = fpin.read()
Jeremy Hylton9ff05b22007-08-29 19:09:54 +0000140 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000141 endrec = struct.unpack(structEndArchive, data)
142 endrec = list(endrec)
143 endrec.append("") # Append the archive comment
144 endrec.append(filesize - 22) # Append the record start offset
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000145 if endrec[-4] == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000146 return _EndRecData64(fpin, -22, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000147 return endrec
148 # Search the last END_BLOCK bytes of the file for the record signature.
149 # The comment is appended to the ZIP file and has a 16 bit length.
150 # So the comment may be up to 64K long. We limit the search for the
151 # signature to a few Kbytes at the end of the file for efficiency.
152 # also, the signature must not appear in the comment.
153 END_BLOCK = min(filesize, 1024 * 4)
154 fpin.seek(filesize - END_BLOCK, 0)
155 data = fpin.read()
156 start = data.rfind(stringEndArchive)
157 if start >= 0: # Correct signature string was found
158 endrec = struct.unpack(structEndArchive, data[start:start+22])
159 endrec = list(endrec)
160 comment = data[start+22:]
161 if endrec[7] == len(comment): # Comment length checks out
162 # Append the archive comment and start offset
163 endrec.append(comment)
164 endrec.append(filesize - END_BLOCK + start)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000165 if endrec[-4] == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000166 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000167 return endrec
168 return # Error, return None
169
Fred Drake484d7352000-10-02 21:14:52 +0000170
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000171class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000172 """Class with attributes describing each file in the ZIP archive."""
173
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000174 __slots__ = (
175 'orig_filename',
176 'filename',
177 'date_time',
178 'compress_type',
179 'comment',
180 'extra',
181 'create_system',
182 'create_version',
183 'extract_version',
184 'reserved',
185 'flag_bits',
186 'volume',
187 'internal_attr',
188 'external_attr',
189 'header_offset',
190 'CRC',
191 'compress_size',
192 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000193 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000194 )
195
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000196 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000197 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000198
199 # Terminate the file name at the first null byte. Null bytes in file
200 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000201 null_byte = filename.find(chr(0))
202 if null_byte >= 0:
203 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000204 # This is used to ensure paths in generated ZIP files always use
205 # forward slashes as the directory separator, as required by the
206 # ZIP format specification.
207 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000208 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000209
Greg Ward8e36d282003-06-18 00:53:06 +0000210 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000211 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000212 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000213 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000214 self.comment = b"" # Comment for each file
215 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000216 if sys.platform == 'win32':
217 self.create_system = 0 # System which created ZIP archive
218 else:
219 # Assume everything else is unix-y
220 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000221 self.create_version = 20 # Version which created ZIP archive
222 self.extract_version = 20 # Version needed to extract archive
223 self.reserved = 0 # Must be zero
224 self.flag_bits = 0 # ZIP flag bits
225 self.volume = 0 # Volume number of file header
226 self.internal_attr = 0 # Internal attributes
227 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000228 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000229 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000230 # CRC CRC-32 of the uncompressed file
231 # compress_size Size of the compressed file
232 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000233
234 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000235 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000236 dt = self.date_time
237 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000238 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000239 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000240 # Set these to zero because we write them after the file data
241 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000242 else:
Tim Peterse1190062001-01-15 03:34:38 +0000243 CRC = self.CRC
244 compress_size = self.compress_size
245 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000246
247 extra = self.extra
248
249 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
250 # File is larger than what fits into a 4 byte integer,
251 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000252 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000253 extra = extra + struct.pack(fmt,
254 1, struct.calcsize(fmt)-4, file_size, compress_size)
255 file_size = 0xffffffff # -1
256 compress_size = 0xffffffff # -1
257 self.extract_version = max(45, self.extract_version)
258 self.create_version = max(45, self.extract_version)
259
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000260 filename, flag_bits = self._encodeFilenameFlags()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000261 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000262 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000263 self.compress_type, dostime, dosdate, CRC,
264 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000265 len(filename), len(extra))
266 return header + filename + extra
267
268 def _encodeFilenameFlags(self):
269 try:
270 return self.filename.encode('ascii'), self.flag_bits
271 except UnicodeEncodeError:
272 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000273
274 def _decodeExtra(self):
275 # Try to decode the extra field.
276 extra = self.extra
277 unpack = struct.unpack
278 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000279 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000280 if tp == 1:
281 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000282 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000283 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000284 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000285 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000286 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000287 elif ln == 0:
288 counts = ()
289 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000290 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000291
292 idx = 0
293
294 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000295 # XXX Is this correct? won't this exclude 2**32-1 byte files?
296 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000297 self.file_size = counts[idx]
298 idx += 1
299
Guido van Rossume2a383d2007-01-15 16:59:06 +0000300 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000301 self.compress_size = counts[idx]
302 idx += 1
303
Guido van Rossume2a383d2007-01-15 16:59:06 +0000304 if self.header_offset == -1 or self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000305 old = self.header_offset
306 self.header_offset = counts[idx]
307 idx+=1
308
309 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000310
311
Thomas Wouterscf297e42007-02-23 15:07:44 +0000312class _ZipDecrypter:
313 """Class to handle decryption of files stored within a ZIP archive.
314
315 ZIP supports a password-based form of encryption. Even though known
316 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000317 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000318
319 Usage:
320 zd = _ZipDecrypter(mypwd)
321 plain_char = zd(cypher_char)
322 plain_text = map(zd, cypher_text)
323 """
324
325 def _GenerateCRCTable():
326 """Generate a CRC-32 table.
327
328 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
329 internal keys. We noticed that a direct implementation is faster than
330 relying on binascii.crc32().
331 """
332 poly = 0xedb88320
333 table = [0] * 256
334 for i in range(256):
335 crc = i
336 for j in range(8):
337 if crc & 1:
338 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
339 else:
340 crc = ((crc >> 1) & 0x7FFFFFFF)
341 table[i] = crc
342 return table
343 crctable = _GenerateCRCTable()
344
345 def _crc32(self, ch, crc):
346 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000347 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000348
349 def __init__(self, pwd):
350 self.key0 = 305419896
351 self.key1 = 591751049
352 self.key2 = 878082192
353 for p in pwd:
354 self._UpdateKeys(p)
355
356 def _UpdateKeys(self, c):
357 self.key0 = self._crc32(c, self.key0)
358 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
359 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000360 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000361
362 def __call__(self, c):
363 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000364 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000365 k = self.key2 | 2
366 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000367 self._UpdateKeys(c)
368 return c
369
Guido van Rossumd8faa362007-04-27 19:54:29 +0000370class ZipExtFile:
371 """File-like object for reading an archive member.
372 Is returned by ZipFile.open().
373 """
374
375 def __init__(self, fileobj, zipinfo, decrypt=None):
376 self.fileobj = fileobj
377 self.decrypter = decrypt
378 self.bytes_read = 0
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000379 self.rawbuffer = b''
380 self.readbuffer = b''
381 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000382 self.eof = False
383 self.univ_newlines = False
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000384 self.nlSeps = (b"\n", )
385 self.lastdiscard = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000386
387 self.compress_type = zipinfo.compress_type
388 self.compress_size = zipinfo.compress_size
389
390 self.closed = False
391 self.mode = "r"
392 self.name = zipinfo.filename
393
394 # read from compressed files in 64k blocks
395 self.compreadsize = 64*1024
396 if self.compress_type == ZIP_DEFLATED:
397 self.dc = zlib.decompressobj(-15)
398
399 def set_univ_newlines(self, univ_newlines):
400 self.univ_newlines = univ_newlines
401
402 # pick line separator char(s) based on universal newlines flag
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000403 self.nlSeps = (b"\n", )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000404 if self.univ_newlines:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000405 self.nlSeps = (b"\r\n", b"\r", b"\n")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000406
407 def __iter__(self):
408 return self
409
410 def __next__(self):
411 nextline = self.readline()
412 if not nextline:
413 raise StopIteration()
414
415 return nextline
416
417 def close(self):
418 self.closed = True
419
420 def _checkfornewline(self):
421 nl, nllen = -1, -1
422 if self.linebuffer:
423 # ugly check for cases where half of an \r\n pair was
424 # read on the last pass, and the \r was discarded. In this
425 # case we just throw away the \n at the start of the buffer.
Guido van Rossum814661e2007-07-18 22:07:29 +0000426 if (self.lastdiscard, self.linebuffer[:1]) == (b'\r', b'\n'):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000427 self.linebuffer = self.linebuffer[1:]
428
429 for sep in self.nlSeps:
430 nl = self.linebuffer.find(sep)
431 if nl >= 0:
432 nllen = len(sep)
433 return nl, nllen
434
435 return nl, nllen
436
437 def readline(self, size = -1):
438 """Read a line with approx. size. If size is negative,
439 read a whole line.
440 """
441 if size < 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000442 size = sys.maxsize
Guido van Rossumd8faa362007-04-27 19:54:29 +0000443 elif size == 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000444 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000445
446 # check for a newline already in buffer
447 nl, nllen = self._checkfornewline()
448
449 if nl >= 0:
450 # the next line was already in the buffer
451 nl = min(nl, size)
452 else:
453 # no line break in buffer - try to read more
454 size -= len(self.linebuffer)
455 while nl < 0 and size > 0:
456 buf = self.read(min(size, 100))
457 if not buf:
458 break
459 self.linebuffer += buf
460 size -= len(buf)
461
462 # check for a newline in buffer
463 nl, nllen = self._checkfornewline()
464
465 # we either ran out of bytes in the file, or
466 # met the specified size limit without finding a newline,
467 # so return current buffer
468 if nl < 0:
469 s = self.linebuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000470 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000471 return s
472
473 buf = self.linebuffer[:nl]
474 self.lastdiscard = self.linebuffer[nl:nl + nllen]
475 self.linebuffer = self.linebuffer[nl + nllen:]
476
477 # line is always returned with \n as newline char (except possibly
478 # for a final incomplete line in the file, which is handled above).
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000479 return buf + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000480
481 def readlines(self, sizehint = -1):
482 """Return a list with all (following) lines. The sizehint parameter
483 is ignored in this implementation.
484 """
485 result = []
486 while True:
487 line = self.readline()
488 if not line: break
489 result.append(line)
490 return result
491
492 def read(self, size = None):
Guido van Rossum814661e2007-07-18 22:07:29 +0000493 # act like file obj and return empty string if size is 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000494 if size == 0:
Guido van Rossum814661e2007-07-18 22:07:29 +0000495 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000496
497 # determine read size
498 bytesToRead = self.compress_size - self.bytes_read
499
500 # adjust read size for encrypted files since the first 12 bytes
501 # are for the encryption/password information
502 if self.decrypter is not None:
503 bytesToRead -= 12
504
505 if size is not None and size >= 0:
506 if self.compress_type == ZIP_STORED:
507 lr = len(self.readbuffer)
508 bytesToRead = min(bytesToRead, size - lr)
509 elif self.compress_type == ZIP_DEFLATED:
510 if len(self.readbuffer) > size:
511 # the user has requested fewer bytes than we've already
512 # pulled through the decompressor; don't read any more
513 bytesToRead = 0
514 else:
515 # user will use up the buffer, so read some more
516 lr = len(self.rawbuffer)
517 bytesToRead = min(bytesToRead, self.compreadsize - lr)
518
519 # avoid reading past end of file contents
520 if bytesToRead + self.bytes_read > self.compress_size:
521 bytesToRead = self.compress_size - self.bytes_read
522
523 # try to read from file (if necessary)
524 if bytesToRead > 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000525 data = self.fileobj.read(bytesToRead)
526 self.bytes_read += len(data)
527 try:
528 self.rawbuffer += data
529 except:
530 print(repr(self.fileobj), repr(self.rawbuffer),
531 repr(data))
532 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +0000533
534 # handle contents of raw buffer
535 if self.rawbuffer:
536 newdata = self.rawbuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000537 self.rawbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000538
539 # decrypt new data if we were given an object to handle that
540 if newdata and self.decrypter is not None:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000541 newdata = bytes(map(self.decrypter, newdata))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000542
543 # decompress newly read data if necessary
544 if newdata and self.compress_type == ZIP_DEFLATED:
545 newdata = self.dc.decompress(newdata)
546 self.rawbuffer = self.dc.unconsumed_tail
547 if self.eof and len(self.rawbuffer) == 0:
548 # we're out of raw bytes (both from the file and
549 # the local buffer); flush just to make sure the
550 # decompressor is done
551 newdata += self.dc.flush()
552 # prevent decompressor from being used again
553 self.dc = None
554
555 self.readbuffer += newdata
556
557
558 # return what the user asked for
559 if size is None or len(self.readbuffer) <= size:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000560 data = self.readbuffer
561 self.readbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000562 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000563 data = self.readbuffer[:size]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000564 self.readbuffer = self.readbuffer[size:]
565
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000566 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000567
568
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000569class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000570 """ Class with methods to open, read, write, close, list zip files.
571
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000572 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000573
Fred Drake3d9091e2001-03-26 15:49:24 +0000574 file: Either the path to the file, or a file-like object.
575 If it is a path, the file will be opened and closed by ZipFile.
576 mode: The mode can be either read "r", write "w" or append "a".
577 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000578 allowZip64: if True ZipFile will create files with ZIP64 extensions when
579 needed, otherwise it will raise an exception when this would
580 be necessary.
581
Fred Drake3d9091e2001-03-26 15:49:24 +0000582 """
Fred Drake484d7352000-10-02 21:14:52 +0000583
Fred Drake90eac282001-02-28 05:29:34 +0000584 fp = None # Set here since __del__ checks it
585
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000586 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000587 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000588 if mode not in ("r", "w", "a"):
589 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
590
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000591 if compression == ZIP_STORED:
592 pass
593 elif compression == ZIP_DEFLATED:
594 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000595 raise RuntimeError(
596 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000597 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000598 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000599
600 self._allowZip64 = allowZip64
601 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000602 self.debug = 0 # Level of printing: 0 through 3
603 self.NameToInfo = {} # Find file info given name
604 self.filelist = [] # List of ZipInfo instances for archive
605 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000606 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000607 self.pwd = None
Tim Petersa19a1682001-03-29 04:36:09 +0000608
Fred Drake3d9091e2001-03-26 15:49:24 +0000609 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000610 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000611 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000612 self._filePassed = 0
613 self.filename = file
614 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000615 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000616 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000617 except IOError:
618 if mode == 'a':
619 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000620 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000621 else:
622 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000623 else:
624 self._filePassed = 1
625 self.fp = file
626 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000627
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000628 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000629 self._GetContents()
630 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000631 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000632 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000633 try: # See if file is a zip file
634 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000635 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000636 self.fp.seek(self.start_dir, 0)
637 except BadZipfile: # file is not a zip file, just append
638 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000639 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000640 if not self._filePassed:
641 self.fp.close()
642 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000643 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000644
645 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000646 """Read the directory, making sure we close the file if the format
647 is bad."""
648 try:
649 self._RealGetContents()
650 except BadZipfile:
651 if not self._filePassed:
652 self.fp.close()
653 self.fp = None
654 raise
655
656 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000657 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000658 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000659 endrec = _EndRecData(fp)
660 if not endrec:
Collin Winterce36ad82007-08-30 01:19:48 +0000661 raise BadZipfile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000662 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000663 print(endrec)
Tim Peterse1190062001-01-15 03:34:38 +0000664 size_cd = endrec[5] # bytes in central directory
665 offset_cd = endrec[6] # offset of central directory
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000666 self.comment = endrec[8] # archive comment
667 # endrec[9] is the offset of the "End of Central Dir" record
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000668 if endrec[9] > ZIP64_LIMIT:
669 x = endrec[9] - size_cd - 56 - 20
670 else:
671 x = endrec[9] - size_cd
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000672 # "concat" is zero, unless zip was concatenated to another file
673 concat = x - offset_cd
674 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000675 print("given, inferred, offset", offset_cd, x, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000676 # self.start_dir: Position of start of central directory
677 self.start_dir = offset_cd + concat
678 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000679 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000680 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000681 total = 0
682 while total < size_cd:
683 centdir = fp.read(46)
684 total = total + 46
685 if centdir[0:4] != stringCentralDir:
Collin Winterce36ad82007-08-30 01:19:48 +0000686 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000687 centdir = struct.unpack(structCentralDir, centdir)
688 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000689 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000690 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000691 flags = centdir[5]
692 if flags & 0x800:
693 # UTF-8 file names extension
694 filename = filename.decode('utf-8')
695 else:
696 # Historical ZIP filename encoding
697 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000698 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000699 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000700 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
701 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
702 total = (total + centdir[_CD_FILENAME_LENGTH]
703 + centdir[_CD_EXTRA_FIELD_LENGTH]
704 + centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000705 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000706 (x.create_version, x.create_system, x.extract_version, x.reserved,
707 x.flag_bits, x.compress_type, t, d,
708 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
709 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
710 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000711 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000712 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000713 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000714
715 x._decodeExtra()
716 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000717 self.filelist.append(x)
718 self.NameToInfo[x.filename] = x
719 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000720 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000721
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000722
723 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000724 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000725 l = []
726 for data in self.filelist:
727 l.append(data.filename)
728 return l
729
730 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000731 """Return a list of class ZipInfo instances for files in the
732 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000733 return self.filelist
734
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000735 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000736 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000737 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
738 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000739 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000740 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000741 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
742 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000743
744 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000745 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000746 for zinfo in self.filelist:
747 try:
Tim Peterse1190062001-01-15 03:34:38 +0000748 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000749 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000750 return zinfo.filename
751
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000752
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000753 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000754 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000755 info = self.NameToInfo.get(name)
756 if info is None:
757 raise KeyError(
758 'There is no item named %r in the archive' % name)
759
760 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000761
Thomas Wouterscf297e42007-02-23 15:07:44 +0000762 def setpassword(self, pwd):
763 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000764 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000765 self.pwd = pwd
766
767 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000768 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000769 return self.open(name, "r", pwd).read()
770
771 def open(self, name, mode="r", pwd=None):
772 """Return file-like object for 'name'."""
773 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000774 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000775 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000776 raise RuntimeError(
777 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000778
Guido van Rossumd8faa362007-04-27 19:54:29 +0000779 # Only open a new file for instances where we were not
780 # given a file object in the constructor
781 if self._filePassed:
782 zef_file = self.fp
783 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000784 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000785
786 # Get info object for name
787 zinfo = self.getinfo(name)
788
789 filepos = zef_file.tell()
790
791 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000792
793 # Skip the file header:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000794 fheader = zef_file.read(30)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000795 if fheader[0:4] != stringFileHeader:
Collin Winterce36ad82007-08-30 01:19:48 +0000796 raise BadZipfile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000797
798 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000799 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000800 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000801 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000802
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000803 if fname != zinfo.orig_filename.encode("utf-8"):
Collin Winterce36ad82007-08-30 01:19:48 +0000804 raise BadZipfile(
805 'File name in directory %r and header %r differ.'
806 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000807
Guido van Rossumd8faa362007-04-27 19:54:29 +0000808 # check for encrypted flag & handle password
809 is_encrypted = zinfo.flag_bits & 0x1
810 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000811 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000812 if not pwd:
813 pwd = self.pwd
814 if not pwd:
Collin Winterce36ad82007-08-30 01:19:48 +0000815 raise RuntimeError("File %s is encrypted, "
816 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000817
Thomas Wouterscf297e42007-02-23 15:07:44 +0000818 zd = _ZipDecrypter(pwd)
819 # The first 12 bytes in the cypher stream is an encryption header
820 # used to strengthen the algorithm. The first 11 bytes are
821 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +0000822 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +0000823 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000824 bytes = zef_file.read(12)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000825 h = list(map(zd, bytes[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +0000826 if zinfo.flag_bits & 0x8:
827 # compare against the file type from extended local headers
828 check_byte = (zinfo._raw_time >> 8) & 0xff
829 else:
830 # compare against the CRC otherwise
831 check_byte = (zinfo.CRC >> 24) & 0xff
832 if h[11] != check_byte:
833 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000834
835 # build and return a ZipExtFile
836 if zd is None:
837 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000838 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000839 zef = ZipExtFile(zef_file, zinfo, zd)
840
841 # set universal newlines on ZipExtFile if necessary
842 if "U" in mode:
843 zef.set_univ_newlines(True)
844 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000845
Christian Heimes790c8232008-01-07 21:14:23 +0000846 def extract(self, member, path=None, pwd=None):
847 """Extract a member from the archive to the current working directory,
848 using its full name. Its file information is extracted as accurately
849 as possible. `member' may be a filename or a ZipInfo object. You can
850 specify a different directory using `path'.
851 """
852 if not isinstance(member, ZipInfo):
853 member = self.getinfo(member)
854
855 if path is None:
856 path = os.getcwd()
857
858 return self._extract_member(member, path, pwd)
859
860 def extractall(self, path=None, members=None, pwd=None):
861 """Extract all members from the archive to the current working
862 directory. `path' specifies a different directory to extract to.
863 `members' is optional and must be a subset of the list returned
864 by namelist().
865 """
866 if members is None:
867 members = self.namelist()
868
869 for zipinfo in members:
870 self.extract(zipinfo, path, pwd)
871
872 def _extract_member(self, member, targetpath, pwd):
873 """Extract the ZipInfo object 'member' to a physical
874 file on the path targetpath.
875 """
876 # build the destination pathname, replacing
877 # forward slashes to platform specific separators.
878 if targetpath[-1:] == "/":
879 targetpath = targetpath[:-1]
880
881 # don't include leading "/" from file name if present
882 if os.path.isabs(member.filename):
883 targetpath = os.path.join(targetpath, member.filename[1:])
884 else:
885 targetpath = os.path.join(targetpath, member.filename)
886
887 targetpath = os.path.normpath(targetpath)
888
889 # Create all upper directories if necessary.
890 upperdirs = os.path.dirname(targetpath)
891 if upperdirs and not os.path.exists(upperdirs):
892 os.makedirs(upperdirs)
893
894 source = self.open(member.filename, pwd=pwd)
895 target = open(targetpath, "wb")
896 shutil.copyfileobj(source, target)
897 source.close()
898 target.close()
899
900 return targetpath
901
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000902 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000903 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000904 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000905 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000906 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000907 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +0000908 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000909 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000910 raise RuntimeError(
911 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000912 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000913 raise RuntimeError(
914 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000915 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +0000916 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000917 if zinfo.file_size > ZIP64_LIMIT:
918 if not self._allowZip64:
919 raise LargeZipFile("Filesize would require ZIP64 extensions")
920 if zinfo.header_offset > ZIP64_LIMIT:
921 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +0000922 raise LargeZipFile(
923 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000924
925 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000926 """Put the bytes from filename into the archive under the name
927 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000928 if not self.fp:
929 raise RuntimeError(
930 "Attempt to write to ZIP archive that was already closed")
931
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000932 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000933 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000934 date_time = mtime[0:6]
935 # Create ZipInfo instance to store file information
936 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000937 arcname = filename
938 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
939 while arcname[0] in (os.sep, os.altsep):
940 arcname = arcname[1:]
941 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000942 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000943 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000944 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000945 else:
Tim Peterse1190062001-01-15 03:34:38 +0000946 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000947
948 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000949 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000950 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000951
952 self._writecheck(zinfo)
953 self._didModify = True
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000954 fp = io.open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000955 # Must overwrite CRC and sizes with correct data later
956 zinfo.CRC = CRC = 0
957 zinfo.compress_size = compress_size = 0
958 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000959 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000960 if zinfo.compress_type == ZIP_DEFLATED:
961 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
962 zlib.DEFLATED, -15)
963 else:
964 cmpr = None
965 while 1:
966 buf = fp.read(1024 * 8)
967 if not buf:
968 break
969 file_size = file_size + len(buf)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000970 CRC = crc32(buf, CRC) & 0xffffffff
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000971 if cmpr:
972 buf = cmpr.compress(buf)
973 compress_size = compress_size + len(buf)
974 self.fp.write(buf)
975 fp.close()
976 if cmpr:
977 buf = cmpr.flush()
978 compress_size = compress_size + len(buf)
979 self.fp.write(buf)
980 zinfo.compress_size = compress_size
981 else:
982 zinfo.compress_size = file_size
983 zinfo.CRC = CRC
984 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000985 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000986 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000987 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000988 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000989 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000990 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000991 self.filelist.append(zinfo)
992 self.NameToInfo[zinfo.filename] = zinfo
993
Guido van Rossum85825dc2007-08-27 17:03:28 +0000994 def writestr(self, zinfo_or_arcname, data):
995 """Write a file into the archive. The contents is 'data', which
996 may be either a 'str' or a 'bytes' instance; if it is a 'str',
997 it is encoded as UTF-8 first.
998 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +0000999 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001000 if isinstance(data, str):
1001 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001002 if not isinstance(zinfo_or_arcname, ZipInfo):
1003 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001004 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001005 zinfo.compress_type = self.compression
1006 else:
1007 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001008
1009 if not self.fp:
1010 raise RuntimeError(
1011 "Attempt to write to ZIP archive that was already closed")
1012
Guido van Rossum85825dc2007-08-27 17:03:28 +00001013 zinfo.file_size = len(data) # Uncompressed size
1014 zinfo.header_offset = self.fp.tell() # Start of header data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001015 self._writecheck(zinfo)
1016 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001017 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001018 if zinfo.compress_type == ZIP_DEFLATED:
1019 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1020 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001021 data = co.compress(data) + co.flush()
1022 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001023 else:
1024 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001025 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001026 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001027 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001028 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001029 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001030 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +00001031 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001032 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001033 self.filelist.append(zinfo)
1034 self.NameToInfo[zinfo.filename] = zinfo
1035
1036 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001037 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001038 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001039
1040 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001041 """Close the file, and for mode "w" and "a" write the ending
1042 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001043 if self.fp is None:
1044 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001045
1046 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001047 count = 0
1048 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001049 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001050 count = count + 1
1051 dt = zinfo.date_time
1052 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001053 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001054 extra = []
1055 if zinfo.file_size > ZIP64_LIMIT \
1056 or zinfo.compress_size > ZIP64_LIMIT:
1057 extra.append(zinfo.file_size)
1058 extra.append(zinfo.compress_size)
1059 file_size = 0xffffffff #-1
1060 compress_size = 0xffffffff #-1
1061 else:
1062 file_size = zinfo.file_size
1063 compress_size = zinfo.compress_size
1064
1065 if zinfo.header_offset > ZIP64_LIMIT:
1066 extra.append(zinfo.header_offset)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001067 header_offset = 0xffffffff # -1 32 bit
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001068 else:
1069 header_offset = zinfo.header_offset
1070
1071 extra_data = zinfo.extra
1072 if extra:
1073 # Append a ZIP64 field to the extra's
1074 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001075 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001076 1, 8*len(extra), *extra) + extra_data
1077
1078 extract_version = max(45, zinfo.extract_version)
1079 create_version = max(45, zinfo.create_version)
1080 else:
1081 extract_version = zinfo.extract_version
1082 create_version = zinfo.create_version
1083
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001084 filename, flag_bits = zinfo._encodeFilenameFlags()
Amaury Forgeot d'Arc2f9d4d12008-03-20 00:35:03 +00001085 centdir = struct.pack(structCentralDir,
1086 stringCentralDir, create_version,
1087 zinfo.create_system, extract_version, zinfo.reserved,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001088 flag_bits, zinfo.compress_type, dostime, dosdate,
Amaury Forgeot d'Arc2f9d4d12008-03-20 00:35:03 +00001089 zinfo.CRC, compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001090 len(filename), len(extra_data), len(zinfo.comment),
Amaury Forgeot d'Arc2f9d4d12008-03-20 00:35:03 +00001091 0, zinfo.internal_attr, zinfo.external_attr,
1092 header_offset)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001093 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001094 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001095 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001096 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001097
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001098 pos2 = self.fp.tell()
1099 # Write end-of-zip-archive record
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001100 if pos1 > ZIP64_LIMIT:
1101 # Need to write the ZIP64 end-of-archive records
1102 zip64endrec = struct.pack(
1103 structEndArchive64, stringEndArchive64,
1104 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1105 self.fp.write(zip64endrec)
1106
1107 zip64locrec = struct.pack(
1108 structEndArchive64Locator,
1109 stringEndArchive64Locator, 0, pos2, 1)
1110 self.fp.write(zip64locrec)
1111
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001112 endrec = struct.pack(structEndArchive, stringEndArchive,
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001113 0, 0, count, count, pos2 - pos1, 0xffffffff, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001114 self.fp.write(endrec)
1115
1116 else:
1117 endrec = struct.pack(structEndArchive, stringEndArchive,
1118 0, 0, count, count, pos2 - pos1, pos1, 0)
1119 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +00001120 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +00001121 if not self._filePassed:
1122 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001123 self.fp = None
1124
1125
1126class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001127 """Class to create ZIP archives with Python library files and packages."""
1128
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001129 def writepy(self, pathname, basename = ""):
1130 """Add all files from "pathname" to the ZIP archive.
1131
Fred Drake484d7352000-10-02 21:14:52 +00001132 If pathname is a package directory, search the directory and
1133 all package subdirectories recursively for all *.py and enter
1134 the modules into the archive. If pathname is a plain
1135 directory, listdir *.py and enter all modules. Else, pathname
1136 must be a Python *.py file and the module will be put into the
1137 archive. Added modules are always module.pyo or module.pyc.
1138 This method will compile the module.py into module.pyc if
1139 necessary.
1140 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001141 dir, name = os.path.split(pathname)
1142 if os.path.isdir(pathname):
1143 initname = os.path.join(pathname, "__init__.py")
1144 if os.path.isfile(initname):
1145 # This is a package directory, add it
1146 if basename:
1147 basename = "%s/%s" % (basename, name)
1148 else:
1149 basename = name
1150 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001151 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001152 fname, arcname = self._get_codename(initname[0:-3], basename)
1153 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001154 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001155 self.write(fname, arcname)
1156 dirlist = os.listdir(pathname)
1157 dirlist.remove("__init__.py")
1158 # Add all *.py files and package subdirectories
1159 for filename in dirlist:
1160 path = os.path.join(pathname, filename)
1161 root, ext = os.path.splitext(filename)
1162 if os.path.isdir(path):
1163 if os.path.isfile(os.path.join(path, "__init__.py")):
1164 # This is a package directory, add it
1165 self.writepy(path, basename) # Recursive call
1166 elif ext == ".py":
1167 fname, arcname = self._get_codename(path[0:-3],
1168 basename)
1169 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001170 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001171 self.write(fname, arcname)
1172 else:
1173 # This is NOT a package directory, add its files at top level
1174 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001175 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001176 for filename in os.listdir(pathname):
1177 path = os.path.join(pathname, filename)
1178 root, ext = os.path.splitext(filename)
1179 if ext == ".py":
1180 fname, arcname = self._get_codename(path[0:-3],
1181 basename)
1182 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001183 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001184 self.write(fname, arcname)
1185 else:
1186 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001187 raise RuntimeError(
1188 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001189 fname, arcname = self._get_codename(pathname[0:-3], basename)
1190 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001191 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001192 self.write(fname, arcname)
1193
1194 def _get_codename(self, pathname, basename):
1195 """Return (filename, archivename) for the path.
1196
Fred Drake484d7352000-10-02 21:14:52 +00001197 Given a module name path, return the correct file path and
1198 archive name, compiling if necessary. For example, given
1199 /python/lib/string, return (/python/lib/string.pyc, string).
1200 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001201 file_py = pathname + ".py"
1202 file_pyc = pathname + ".pyc"
1203 file_pyo = pathname + ".pyo"
1204 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001205 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001206 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001207 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001208 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001209 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001210 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001211 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001212 try:
1213 py_compile.compile(file_py, file_pyc, None, True)
Guido van Rossumb940e112007-01-10 16:19:56 +00001214 except py_compile.PyCompileError as err:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001215 print(err.msg)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001216 fname = file_pyc
1217 else:
1218 fname = file_pyc
1219 archivename = os.path.split(fname)[1]
1220 if basename:
1221 archivename = "%s/%s" % (basename, archivename)
1222 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001223
1224
1225def main(args = None):
1226 import textwrap
1227 USAGE=textwrap.dedent("""\
1228 Usage:
1229 zipfile.py -l zipfile.zip # Show listing of a zipfile
1230 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1231 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1232 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1233 """)
1234 if args is None:
1235 args = sys.argv[1:]
1236
1237 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001238 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001239 sys.exit(1)
1240
1241 if args[0] == '-l':
1242 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001243 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001244 sys.exit(1)
1245 zf = ZipFile(args[1], 'r')
1246 zf.printdir()
1247 zf.close()
1248
1249 elif args[0] == '-t':
1250 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001251 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001252 sys.exit(1)
1253 zf = ZipFile(args[1], 'r')
1254 zf.testzip()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001255 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001256
1257 elif args[0] == '-e':
1258 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001259 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001260 sys.exit(1)
1261
1262 zf = ZipFile(args[1], 'r')
1263 out = args[2]
1264 for path in zf.namelist():
1265 if path.startswith('./'):
1266 tgt = os.path.join(out, path[2:])
1267 else:
1268 tgt = os.path.join(out, path)
1269
1270 tgtdir = os.path.dirname(tgt)
1271 if not os.path.exists(tgtdir):
1272 os.makedirs(tgtdir)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001273 fp = io.open(tgt, 'wb')
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001274 fp.write(zf.read(path))
1275 fp.close()
1276 zf.close()
1277
1278 elif args[0] == '-c':
1279 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001280 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001281 sys.exit(1)
1282
1283 def addToZip(zf, path, zippath):
1284 if os.path.isfile(path):
1285 zf.write(path, zippath, ZIP_DEFLATED)
1286 elif os.path.isdir(path):
1287 for nm in os.listdir(path):
1288 addToZip(zf,
1289 os.path.join(path, nm), os.path.join(zippath, nm))
1290 # else: ignore
1291
1292 zf = ZipFile(args[1], 'w', allowZip64=True)
1293 for src in args[2:]:
1294 addToZip(zf, src, os.path.basename(src))
1295
1296 zf.close()
1297
1298if __name__ == "__main__":
1299 main()