blob: fd923c8279e03aea9ffeacc89db30d77bd3d8697 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Christian Heimes790c8232008-01-07 21:14:23 +00006import struct, os, time, sys, shutil
Guido van Rossum68937b42007-05-18 00:51:22 +00007import binascii, io
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000021
22
23class LargeZipFile(Exception):
24 """
25 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Thomas Wouters0e3f5912006-08-11 14:57:12 +000031ZIP64_LIMIT= (1 << 31) - 1
32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000033# constants for Zip file compression methods
34ZIP_STORED = 0
35ZIP_DEFLATED = 8
36# Other ZIP compression methods not supported
37
38# Here are some struct module formats for reading headers
Christian Heimesfdab48e2008-01-20 09:06:41 +000039structEndArchive = "<4s4H2LH" # 9 items, end of archive, 22 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000040stringEndArchive = b"PK\005\006" # magic number for end of archive record
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000041structCentralDir = "<4s4B4HLLL5HLL"# 19 items, central directory, 46 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000042stringCentralDir = b"PK\001\002" # magic number for central directory
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000043structFileHeader = "<4s2B4HLLL2H" # 12 items, file header record, 30 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000044stringFileHeader = b"PK\003\004" # magic number for file header
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000045structEndArchive64Locator = "<4sLQL" # 4 items, locate Zip64 header, 20 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000046stringEndArchive64Locator = b"PK\x06\x07" # magic token for locator header
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000047structEndArchive64 = "<4sQHHLLQQQQ" # 10 items, end of archive (Zip64), 56 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000048stringEndArchive64 = b"PK\x06\x06" # magic token for Zip64 header
Thomas Wouters0e3f5912006-08-11 14:57:12 +000049
Guido van Rossum32abe6f2000-03-31 17:30:02 +000050
Fred Drake3e038e52001-02-28 17:56:26 +000051# indexes of entries in the central directory structure
52_CD_SIGNATURE = 0
53_CD_CREATE_VERSION = 1
54_CD_CREATE_SYSTEM = 2
55_CD_EXTRACT_VERSION = 3
56_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
57_CD_FLAG_BITS = 5
58_CD_COMPRESS_TYPE = 6
59_CD_TIME = 7
60_CD_DATE = 8
61_CD_CRC = 9
62_CD_COMPRESSED_SIZE = 10
63_CD_UNCOMPRESSED_SIZE = 11
64_CD_FILENAME_LENGTH = 12
65_CD_EXTRA_FIELD_LENGTH = 13
66_CD_COMMENT_LENGTH = 14
67_CD_DISK_NUMBER_START = 15
68_CD_INTERNAL_FILE_ATTRIBUTES = 16
69_CD_EXTERNAL_FILE_ATTRIBUTES = 17
70_CD_LOCAL_HEADER_OFFSET = 18
71
72# indexes of entries in the local file header structure
73_FH_SIGNATURE = 0
74_FH_EXTRACT_VERSION = 1
75_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
76_FH_GENERAL_PURPOSE_FLAG_BITS = 3
77_FH_COMPRESSION_METHOD = 4
78_FH_LAST_MOD_TIME = 5
79_FH_LAST_MOD_DATE = 6
80_FH_CRC = 7
81_FH_COMPRESSED_SIZE = 8
82_FH_UNCOMPRESSED_SIZE = 9
83_FH_FILENAME_LENGTH = 10
84_FH_EXTRA_FIELD_LENGTH = 11
85
Guido van Rossum32abe6f2000-03-31 17:30:02 +000086def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000087 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +000088 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +000089 fpin = io.open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000090 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +000091 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000092 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +000093 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000094 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000095 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000096 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000097
Thomas Wouters0e3f5912006-08-11 14:57:12 +000098def _EndRecData64(fpin, offset, endrec):
99 """
100 Read the ZIP64 end-of-archive records and use that to update endrec
101 """
102 locatorSize = struct.calcsize(structEndArchive64Locator)
103 fpin.seek(offset - locatorSize, 2)
104 data = fpin.read(locatorSize)
105 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
106 if sig != stringEndArchive64Locator:
107 return endrec
108
109 if diskno != 0 or disks != 1:
110 raise BadZipfile("zipfiles that span multiple disks are not supported")
111
112 # Assume no 'zip64 extensible data'
113 endArchiveSize = struct.calcsize(structEndArchive64)
114 fpin.seek(offset - locatorSize - endArchiveSize, 2)
115 data = fpin.read(endArchiveSize)
116 sig, sz, create_version, read_version, disk_num, disk_dir, \
117 dircount, dircount2, dirsize, diroffset = \
118 struct.unpack(structEndArchive64, data)
119 if sig != stringEndArchive64:
120 return endrec
121
122 # Update the original endrec using data from the ZIP64 record
123 endrec[1] = disk_num
124 endrec[2] = disk_dir
125 endrec[3] = dircount
126 endrec[4] = dircount2
127 endrec[5] = dirsize
128 endrec[6] = diroffset
129 return endrec
130
131
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000132def _EndRecData(fpin):
133 """Return data from the "End of Central Directory" record, or None.
134
135 The data is a list of the nine items in the ZIP "End of central dir"
136 record followed by a tenth item, the file seek offset of this record."""
137 fpin.seek(-22, 2) # Assume no archive comment.
138 filesize = fpin.tell() + 22 # Get file size
139 data = fpin.read()
Jeremy Hylton9ff05b22007-08-29 19:09:54 +0000140 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000141 endrec = struct.unpack(structEndArchive, data)
142 endrec = list(endrec)
143 endrec.append("") # Append the archive comment
144 endrec.append(filesize - 22) # Append the record start offset
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000145 if endrec[-4] == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000146 return _EndRecData64(fpin, -22, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000147 return endrec
148 # Search the last END_BLOCK bytes of the file for the record signature.
149 # The comment is appended to the ZIP file and has a 16 bit length.
150 # So the comment may be up to 64K long. We limit the search for the
151 # signature to a few Kbytes at the end of the file for efficiency.
152 # also, the signature must not appear in the comment.
153 END_BLOCK = min(filesize, 1024 * 4)
154 fpin.seek(filesize - END_BLOCK, 0)
155 data = fpin.read()
156 start = data.rfind(stringEndArchive)
157 if start >= 0: # Correct signature string was found
158 endrec = struct.unpack(structEndArchive, data[start:start+22])
159 endrec = list(endrec)
160 comment = data[start+22:]
161 if endrec[7] == len(comment): # Comment length checks out
162 # Append the archive comment and start offset
163 endrec.append(comment)
164 endrec.append(filesize - END_BLOCK + start)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000165 if endrec[-4] == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000166 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000167 return endrec
168 return # Error, return None
169
Fred Drake484d7352000-10-02 21:14:52 +0000170
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000171class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000172 """Class with attributes describing each file in the ZIP archive."""
173
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000174 __slots__ = (
175 'orig_filename',
176 'filename',
177 'date_time',
178 'compress_type',
179 'comment',
180 'extra',
181 'create_system',
182 'create_version',
183 'extract_version',
184 'reserved',
185 'flag_bits',
186 'volume',
187 'internal_attr',
188 'external_attr',
189 'header_offset',
190 'CRC',
191 'compress_size',
192 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000193 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000194 )
195
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000196 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000197 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000198
199 # Terminate the file name at the first null byte. Null bytes in file
200 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000201 null_byte = filename.find(chr(0))
202 if null_byte >= 0:
203 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000204 # This is used to ensure paths in generated ZIP files always use
205 # forward slashes as the directory separator, as required by the
206 # ZIP format specification.
207 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000208 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000209
Greg Ward8e36d282003-06-18 00:53:06 +0000210 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000211 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000212 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000213 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000214 self.comment = b"" # Comment for each file
215 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000216 if sys.platform == 'win32':
217 self.create_system = 0 # System which created ZIP archive
218 else:
219 # Assume everything else is unix-y
220 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000221 self.create_version = 20 # Version which created ZIP archive
222 self.extract_version = 20 # Version needed to extract archive
223 self.reserved = 0 # Must be zero
224 self.flag_bits = 0 # ZIP flag bits
225 self.volume = 0 # Volume number of file header
226 self.internal_attr = 0 # Internal attributes
227 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000228 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000229 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000230 # CRC CRC-32 of the uncompressed file
231 # compress_size Size of the compressed file
232 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000233
234 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000235 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000236 dt = self.date_time
237 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000238 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000239 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000240 # Set these to zero because we write them after the file data
241 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000242 else:
Tim Peterse1190062001-01-15 03:34:38 +0000243 CRC = self.CRC
244 compress_size = self.compress_size
245 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000246
247 extra = self.extra
248
249 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
250 # File is larger than what fits into a 4 byte integer,
251 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000252 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000253 extra = extra + struct.pack(fmt,
254 1, struct.calcsize(fmt)-4, file_size, compress_size)
255 file_size = 0xffffffff # -1
256 compress_size = 0xffffffff # -1
257 self.extract_version = max(45, self.extract_version)
258 self.create_version = max(45, self.extract_version)
259
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000260 filename, flag_bits = self._encodeFilenameFlags()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000261 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000262 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000263 self.compress_type, dostime, dosdate, CRC,
264 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000265 len(filename), len(extra))
266 return header + filename + extra
267
268 def _encodeFilenameFlags(self):
269 try:
270 return self.filename.encode('ascii'), self.flag_bits
271 except UnicodeEncodeError:
272 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000273
274 def _decodeExtra(self):
275 # Try to decode the extra field.
276 extra = self.extra
277 unpack = struct.unpack
278 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000279 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000280 if tp == 1:
281 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000282 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000283 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000284 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000285 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000286 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000287 elif ln == 0:
288 counts = ()
289 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000290 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000291
292 idx = 0
293
294 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000295 # XXX Is this correct? won't this exclude 2**32-1 byte files?
296 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000297 self.file_size = counts[idx]
298 idx += 1
299
Guido van Rossume2a383d2007-01-15 16:59:06 +0000300 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000301 self.compress_size = counts[idx]
302 idx += 1
303
Guido van Rossume2a383d2007-01-15 16:59:06 +0000304 if self.header_offset == -1 or self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000305 old = self.header_offset
306 self.header_offset = counts[idx]
307 idx+=1
308
309 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000310
311
Thomas Wouterscf297e42007-02-23 15:07:44 +0000312class _ZipDecrypter:
313 """Class to handle decryption of files stored within a ZIP archive.
314
315 ZIP supports a password-based form of encryption. Even though known
316 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000317 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000318
319 Usage:
320 zd = _ZipDecrypter(mypwd)
321 plain_char = zd(cypher_char)
322 plain_text = map(zd, cypher_text)
323 """
324
325 def _GenerateCRCTable():
326 """Generate a CRC-32 table.
327
328 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
329 internal keys. We noticed that a direct implementation is faster than
330 relying on binascii.crc32().
331 """
332 poly = 0xedb88320
333 table = [0] * 256
334 for i in range(256):
335 crc = i
336 for j in range(8):
337 if crc & 1:
338 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
339 else:
340 crc = ((crc >> 1) & 0x7FFFFFFF)
341 table[i] = crc
342 return table
343 crctable = _GenerateCRCTable()
344
345 def _crc32(self, ch, crc):
346 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000347 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000348
349 def __init__(self, pwd):
350 self.key0 = 305419896
351 self.key1 = 591751049
352 self.key2 = 878082192
353 for p in pwd:
354 self._UpdateKeys(p)
355
356 def _UpdateKeys(self, c):
357 self.key0 = self._crc32(c, self.key0)
358 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
359 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000360 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000361
362 def __call__(self, c):
363 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000364 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000365 k = self.key2 | 2
366 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000367 self._UpdateKeys(c)
368 return c
369
Guido van Rossumd8faa362007-04-27 19:54:29 +0000370class ZipExtFile:
371 """File-like object for reading an archive member.
372 Is returned by ZipFile.open().
373 """
374
375 def __init__(self, fileobj, zipinfo, decrypt=None):
376 self.fileobj = fileobj
377 self.decrypter = decrypt
378 self.bytes_read = 0
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000379 self.rawbuffer = b''
380 self.readbuffer = b''
381 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000382 self.eof = False
383 self.univ_newlines = False
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000384 self.nlSeps = (b"\n", )
385 self.lastdiscard = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000386
387 self.compress_type = zipinfo.compress_type
388 self.compress_size = zipinfo.compress_size
389
390 self.closed = False
391 self.mode = "r"
392 self.name = zipinfo.filename
393
394 # read from compressed files in 64k blocks
395 self.compreadsize = 64*1024
396 if self.compress_type == ZIP_DEFLATED:
397 self.dc = zlib.decompressobj(-15)
398
399 def set_univ_newlines(self, univ_newlines):
400 self.univ_newlines = univ_newlines
401
402 # pick line separator char(s) based on universal newlines flag
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000403 self.nlSeps = (b"\n", )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000404 if self.univ_newlines:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000405 self.nlSeps = (b"\r\n", b"\r", b"\n")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000406
407 def __iter__(self):
408 return self
409
410 def __next__(self):
411 nextline = self.readline()
412 if not nextline:
413 raise StopIteration()
414
415 return nextline
416
417 def close(self):
418 self.closed = True
419
420 def _checkfornewline(self):
421 nl, nllen = -1, -1
422 if self.linebuffer:
423 # ugly check for cases where half of an \r\n pair was
424 # read on the last pass, and the \r was discarded. In this
425 # case we just throw away the \n at the start of the buffer.
Guido van Rossum814661e2007-07-18 22:07:29 +0000426 if (self.lastdiscard, self.linebuffer[:1]) == (b'\r', b'\n'):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000427 self.linebuffer = self.linebuffer[1:]
428
429 for sep in self.nlSeps:
430 nl = self.linebuffer.find(sep)
431 if nl >= 0:
432 nllen = len(sep)
433 return nl, nllen
434
435 return nl, nllen
436
437 def readline(self, size = -1):
438 """Read a line with approx. size. If size is negative,
439 read a whole line.
440 """
441 if size < 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000442 size = sys.maxsize
Guido van Rossumd8faa362007-04-27 19:54:29 +0000443 elif size == 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000444 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000445
446 # check for a newline already in buffer
447 nl, nllen = self._checkfornewline()
448
449 if nl >= 0:
450 # the next line was already in the buffer
451 nl = min(nl, size)
452 else:
453 # no line break in buffer - try to read more
454 size -= len(self.linebuffer)
455 while nl < 0 and size > 0:
456 buf = self.read(min(size, 100))
457 if not buf:
458 break
459 self.linebuffer += buf
460 size -= len(buf)
461
462 # check for a newline in buffer
463 nl, nllen = self._checkfornewline()
464
465 # we either ran out of bytes in the file, or
466 # met the specified size limit without finding a newline,
467 # so return current buffer
468 if nl < 0:
469 s = self.linebuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000470 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000471 return s
472
473 buf = self.linebuffer[:nl]
474 self.lastdiscard = self.linebuffer[nl:nl + nllen]
475 self.linebuffer = self.linebuffer[nl + nllen:]
476
477 # line is always returned with \n as newline char (except possibly
478 # for a final incomplete line in the file, which is handled above).
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000479 return buf + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000480
481 def readlines(self, sizehint = -1):
482 """Return a list with all (following) lines. The sizehint parameter
483 is ignored in this implementation.
484 """
485 result = []
486 while True:
487 line = self.readline()
488 if not line: break
489 result.append(line)
490 return result
491
492 def read(self, size = None):
Guido van Rossum814661e2007-07-18 22:07:29 +0000493 # act like file obj and return empty string if size is 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000494 if size == 0:
Guido van Rossum814661e2007-07-18 22:07:29 +0000495 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000496
497 # determine read size
498 bytesToRead = self.compress_size - self.bytes_read
499
500 # adjust read size for encrypted files since the first 12 bytes
501 # are for the encryption/password information
502 if self.decrypter is not None:
503 bytesToRead -= 12
504
505 if size is not None and size >= 0:
506 if self.compress_type == ZIP_STORED:
507 lr = len(self.readbuffer)
508 bytesToRead = min(bytesToRead, size - lr)
509 elif self.compress_type == ZIP_DEFLATED:
510 if len(self.readbuffer) > size:
511 # the user has requested fewer bytes than we've already
512 # pulled through the decompressor; don't read any more
513 bytesToRead = 0
514 else:
515 # user will use up the buffer, so read some more
516 lr = len(self.rawbuffer)
517 bytesToRead = min(bytesToRead, self.compreadsize - lr)
518
519 # avoid reading past end of file contents
520 if bytesToRead + self.bytes_read > self.compress_size:
521 bytesToRead = self.compress_size - self.bytes_read
522
523 # try to read from file (if necessary)
524 if bytesToRead > 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000525 data = self.fileobj.read(bytesToRead)
526 self.bytes_read += len(data)
527 try:
528 self.rawbuffer += data
529 except:
530 print(repr(self.fileobj), repr(self.rawbuffer),
531 repr(data))
532 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +0000533
534 # handle contents of raw buffer
535 if self.rawbuffer:
536 newdata = self.rawbuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000537 self.rawbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000538
539 # decrypt new data if we were given an object to handle that
540 if newdata and self.decrypter is not None:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000541 newdata = bytes(map(self.decrypter, newdata))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000542
543 # decompress newly read data if necessary
544 if newdata and self.compress_type == ZIP_DEFLATED:
545 newdata = self.dc.decompress(newdata)
546 self.rawbuffer = self.dc.unconsumed_tail
547 if self.eof and len(self.rawbuffer) == 0:
548 # we're out of raw bytes (both from the file and
549 # the local buffer); flush just to make sure the
550 # decompressor is done
551 newdata += self.dc.flush()
552 # prevent decompressor from being used again
553 self.dc = None
554
555 self.readbuffer += newdata
556
557
558 # return what the user asked for
559 if size is None or len(self.readbuffer) <= size:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000560 data = self.readbuffer
561 self.readbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000562 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000563 data = self.readbuffer[:size]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000564 self.readbuffer = self.readbuffer[size:]
565
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000566 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000567
568
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000569class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000570 """ Class with methods to open, read, write, close, list zip files.
571
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000572 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000573
Fred Drake3d9091e2001-03-26 15:49:24 +0000574 file: Either the path to the file, or a file-like object.
575 If it is a path, the file will be opened and closed by ZipFile.
576 mode: The mode can be either read "r", write "w" or append "a".
577 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000578 allowZip64: if True ZipFile will create files with ZIP64 extensions when
579 needed, otherwise it will raise an exception when this would
580 be necessary.
581
Fred Drake3d9091e2001-03-26 15:49:24 +0000582 """
Fred Drake484d7352000-10-02 21:14:52 +0000583
Fred Drake90eac282001-02-28 05:29:34 +0000584 fp = None # Set here since __del__ checks it
585
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000586 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000587 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000588 if mode not in ("r", "w", "a"):
589 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
590
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000591 if compression == ZIP_STORED:
592 pass
593 elif compression == ZIP_DEFLATED:
594 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000595 raise RuntimeError(
596 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000597 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000598 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000599
600 self._allowZip64 = allowZip64
601 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000602 self.debug = 0 # Level of printing: 0 through 3
603 self.NameToInfo = {} # Find file info given name
604 self.filelist = [] # List of ZipInfo instances for archive
605 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000606 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000607 self.pwd = None
Tim Petersa19a1682001-03-29 04:36:09 +0000608
Fred Drake3d9091e2001-03-26 15:49:24 +0000609 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000610 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000611 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000612 self._filePassed = 0
613 self.filename = file
614 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000615 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000616 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000617 except IOError:
618 if mode == 'a':
619 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000620 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000621 else:
622 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000623 else:
624 self._filePassed = 1
625 self.fp = file
626 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000627
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000628 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000629 self._GetContents()
630 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000631 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000632 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000633 try: # See if file is a zip file
634 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000635 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000636 self.fp.seek(self.start_dir, 0)
637 except BadZipfile: # file is not a zip file, just append
638 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000639 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000640 if not self._filePassed:
641 self.fp.close()
642 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000643 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000644
645 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000646 """Read the directory, making sure we close the file if the format
647 is bad."""
648 try:
649 self._RealGetContents()
650 except BadZipfile:
651 if not self._filePassed:
652 self.fp.close()
653 self.fp = None
654 raise
655
656 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000657 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000658 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000659 endrec = _EndRecData(fp)
660 if not endrec:
Collin Winterce36ad82007-08-30 01:19:48 +0000661 raise BadZipfile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000662 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000663 print(endrec)
Tim Peterse1190062001-01-15 03:34:38 +0000664 size_cd = endrec[5] # bytes in central directory
665 offset_cd = endrec[6] # offset of central directory
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000666 self.comment = endrec[8] # archive comment
667 # endrec[9] is the offset of the "End of Central Dir" record
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000668 if endrec[9] > ZIP64_LIMIT:
669 x = endrec[9] - size_cd - 56 - 20
670 else:
671 x = endrec[9] - size_cd
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000672 # "concat" is zero, unless zip was concatenated to another file
673 concat = x - offset_cd
674 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000675 print("given, inferred, offset", offset_cd, x, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000676 # self.start_dir: Position of start of central directory
677 self.start_dir = offset_cd + concat
678 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000679 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000680 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000681 total = 0
682 while total < size_cd:
683 centdir = fp.read(46)
684 total = total + 46
685 if centdir[0:4] != stringCentralDir:
Collin Winterce36ad82007-08-30 01:19:48 +0000686 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000687 centdir = struct.unpack(structCentralDir, centdir)
688 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000689 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000690 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000691 flags = centdir[5]
692 if flags & 0x800:
693 # UTF-8 file names extension
694 filename = filename.decode('utf-8')
695 else:
696 # Historical ZIP filename encoding
697 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000698 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000699 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000700 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
701 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
702 total = (total + centdir[_CD_FILENAME_LENGTH]
703 + centdir[_CD_EXTRA_FIELD_LENGTH]
704 + centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000705 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000706 (x.create_version, x.create_system, x.extract_version, x.reserved,
707 x.flag_bits, x.compress_type, t, d,
708 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
709 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
710 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000711 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000712 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000713 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000714
715 x._decodeExtra()
716 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000717 self.filelist.append(x)
718 self.NameToInfo[x.filename] = x
719 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000720 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000721
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000722
723 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000724 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000725 l = []
726 for data in self.filelist:
727 l.append(data.filename)
728 return l
729
730 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000731 """Return a list of class ZipInfo instances for files in the
732 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000733 return self.filelist
734
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000735 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000736 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000737 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
738 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000739 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000740 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000741 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
742 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000743
744 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000745 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000746 for zinfo in self.filelist:
747 try:
Tim Peterse1190062001-01-15 03:34:38 +0000748 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000749 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000750 return zinfo.filename
751
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000752
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000753 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000754 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000755 info = self.NameToInfo.get(name)
756 if info is None:
757 raise KeyError(
758 'There is no item named %r in the archive' % name)
759
760 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000761
Thomas Wouterscf297e42007-02-23 15:07:44 +0000762 def setpassword(self, pwd):
763 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000764 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000765 self.pwd = pwd
766
767 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000768 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000769 return self.open(name, "r", pwd).read()
770
771 def open(self, name, mode="r", pwd=None):
772 """Return file-like object for 'name'."""
773 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000774 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000775 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000776 raise RuntimeError(
777 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000778
Guido van Rossumd8faa362007-04-27 19:54:29 +0000779 # Only open a new file for instances where we were not
780 # given a file object in the constructor
781 if self._filePassed:
782 zef_file = self.fp
783 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000784 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000785
Georg Brandlb533e262008-05-25 18:19:30 +0000786 # Make sure we have an info object
787 if isinstance(name, ZipInfo):
788 # 'name' is already an info object
789 zinfo = name
790 else:
791 # Get info object for name
792 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000793
794 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000795
796 # Skip the file header:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000797 fheader = zef_file.read(30)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000798 if fheader[0:4] != stringFileHeader:
Collin Winterce36ad82007-08-30 01:19:48 +0000799 raise BadZipfile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000800
801 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000802 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000803 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000804 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000805
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000806 if fname != zinfo.orig_filename.encode("utf-8"):
Collin Winterce36ad82007-08-30 01:19:48 +0000807 raise BadZipfile(
808 'File name in directory %r and header %r differ.'
809 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000810
Guido van Rossumd8faa362007-04-27 19:54:29 +0000811 # check for encrypted flag & handle password
812 is_encrypted = zinfo.flag_bits & 0x1
813 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000814 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000815 if not pwd:
816 pwd = self.pwd
817 if not pwd:
Collin Winterce36ad82007-08-30 01:19:48 +0000818 raise RuntimeError("File %s is encrypted, "
819 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000820
Thomas Wouterscf297e42007-02-23 15:07:44 +0000821 zd = _ZipDecrypter(pwd)
822 # The first 12 bytes in the cypher stream is an encryption header
823 # used to strengthen the algorithm. The first 11 bytes are
824 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +0000825 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +0000826 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000827 bytes = zef_file.read(12)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000828 h = list(map(zd, bytes[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +0000829 if zinfo.flag_bits & 0x8:
830 # compare against the file type from extended local headers
831 check_byte = (zinfo._raw_time >> 8) & 0xff
832 else:
833 # compare against the CRC otherwise
834 check_byte = (zinfo.CRC >> 24) & 0xff
835 if h[11] != check_byte:
836 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000837
838 # build and return a ZipExtFile
839 if zd is None:
840 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000841 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000842 zef = ZipExtFile(zef_file, zinfo, zd)
843
844 # set universal newlines on ZipExtFile if necessary
845 if "U" in mode:
846 zef.set_univ_newlines(True)
847 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000848
Christian Heimes790c8232008-01-07 21:14:23 +0000849 def extract(self, member, path=None, pwd=None):
850 """Extract a member from the archive to the current working directory,
851 using its full name. Its file information is extracted as accurately
852 as possible. `member' may be a filename or a ZipInfo object. You can
853 specify a different directory using `path'.
854 """
855 if not isinstance(member, ZipInfo):
856 member = self.getinfo(member)
857
858 if path is None:
859 path = os.getcwd()
860
861 return self._extract_member(member, path, pwd)
862
863 def extractall(self, path=None, members=None, pwd=None):
864 """Extract all members from the archive to the current working
865 directory. `path' specifies a different directory to extract to.
866 `members' is optional and must be a subset of the list returned
867 by namelist().
868 """
869 if members is None:
870 members = self.namelist()
871
872 for zipinfo in members:
873 self.extract(zipinfo, path, pwd)
874
875 def _extract_member(self, member, targetpath, pwd):
876 """Extract the ZipInfo object 'member' to a physical
877 file on the path targetpath.
878 """
879 # build the destination pathname, replacing
880 # forward slashes to platform specific separators.
881 if targetpath[-1:] == "/":
882 targetpath = targetpath[:-1]
883
884 # don't include leading "/" from file name if present
885 if os.path.isabs(member.filename):
886 targetpath = os.path.join(targetpath, member.filename[1:])
887 else:
888 targetpath = os.path.join(targetpath, member.filename)
889
890 targetpath = os.path.normpath(targetpath)
891
892 # Create all upper directories if necessary.
893 upperdirs = os.path.dirname(targetpath)
894 if upperdirs and not os.path.exists(upperdirs):
895 os.makedirs(upperdirs)
896
Georg Brandlb533e262008-05-25 18:19:30 +0000897 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +0000898 target = open(targetpath, "wb")
899 shutil.copyfileobj(source, target)
900 source.close()
901 target.close()
902
903 return targetpath
904
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000905 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000906 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000907 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000908 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000909 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000910 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +0000911 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000912 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000913 raise RuntimeError(
914 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000915 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000916 raise RuntimeError(
917 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000918 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +0000919 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000920 if zinfo.file_size > ZIP64_LIMIT:
921 if not self._allowZip64:
922 raise LargeZipFile("Filesize would require ZIP64 extensions")
923 if zinfo.header_offset > ZIP64_LIMIT:
924 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +0000925 raise LargeZipFile(
926 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000927
928 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000929 """Put the bytes from filename into the archive under the name
930 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000931 if not self.fp:
932 raise RuntimeError(
933 "Attempt to write to ZIP archive that was already closed")
934
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000935 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000936 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000937 date_time = mtime[0:6]
938 # Create ZipInfo instance to store file information
939 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000940 arcname = filename
941 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
942 while arcname[0] in (os.sep, os.altsep):
943 arcname = arcname[1:]
944 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000945 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000946 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000947 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000948 else:
Tim Peterse1190062001-01-15 03:34:38 +0000949 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000950
951 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000952 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000953 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000954
955 self._writecheck(zinfo)
956 self._didModify = True
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000957 fp = io.open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000958 # Must overwrite CRC and sizes with correct data later
959 zinfo.CRC = CRC = 0
960 zinfo.compress_size = compress_size = 0
961 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000962 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000963 if zinfo.compress_type == ZIP_DEFLATED:
964 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
965 zlib.DEFLATED, -15)
966 else:
967 cmpr = None
968 while 1:
969 buf = fp.read(1024 * 8)
970 if not buf:
971 break
972 file_size = file_size + len(buf)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000973 CRC = crc32(buf, CRC) & 0xffffffff
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000974 if cmpr:
975 buf = cmpr.compress(buf)
976 compress_size = compress_size + len(buf)
977 self.fp.write(buf)
978 fp.close()
979 if cmpr:
980 buf = cmpr.flush()
981 compress_size = compress_size + len(buf)
982 self.fp.write(buf)
983 zinfo.compress_size = compress_size
984 else:
985 zinfo.compress_size = file_size
986 zinfo.CRC = CRC
987 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000988 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000989 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000990 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000991 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000992 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000993 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000994 self.filelist.append(zinfo)
995 self.NameToInfo[zinfo.filename] = zinfo
996
Guido van Rossum85825dc2007-08-27 17:03:28 +0000997 def writestr(self, zinfo_or_arcname, data):
998 """Write a file into the archive. The contents is 'data', which
999 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1000 it is encoded as UTF-8 first.
1001 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001002 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001003 if isinstance(data, str):
1004 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001005 if not isinstance(zinfo_or_arcname, ZipInfo):
1006 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001007 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001008 zinfo.compress_type = self.compression
1009 else:
1010 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001011
1012 if not self.fp:
1013 raise RuntimeError(
1014 "Attempt to write to ZIP archive that was already closed")
1015
Guido van Rossum85825dc2007-08-27 17:03:28 +00001016 zinfo.file_size = len(data) # Uncompressed size
1017 zinfo.header_offset = self.fp.tell() # Start of header data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001018 self._writecheck(zinfo)
1019 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001020 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001021 if zinfo.compress_type == ZIP_DEFLATED:
1022 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1023 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001024 data = co.compress(data) + co.flush()
1025 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001026 else:
1027 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001028 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001029 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001030 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001031 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001032 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001033 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +00001034 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001035 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001036 self.filelist.append(zinfo)
1037 self.NameToInfo[zinfo.filename] = zinfo
1038
1039 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001040 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001041 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001042
1043 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001044 """Close the file, and for mode "w" and "a" write the ending
1045 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001046 if self.fp is None:
1047 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001048
1049 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001050 count = 0
1051 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001052 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001053 count = count + 1
1054 dt = zinfo.date_time
1055 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001056 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001057 extra = []
1058 if zinfo.file_size > ZIP64_LIMIT \
1059 or zinfo.compress_size > ZIP64_LIMIT:
1060 extra.append(zinfo.file_size)
1061 extra.append(zinfo.compress_size)
1062 file_size = 0xffffffff #-1
1063 compress_size = 0xffffffff #-1
1064 else:
1065 file_size = zinfo.file_size
1066 compress_size = zinfo.compress_size
1067
1068 if zinfo.header_offset > ZIP64_LIMIT:
1069 extra.append(zinfo.header_offset)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001070 header_offset = 0xffffffff # -1 32 bit
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001071 else:
1072 header_offset = zinfo.header_offset
1073
1074 extra_data = zinfo.extra
1075 if extra:
1076 # Append a ZIP64 field to the extra's
1077 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001078 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001079 1, 8*len(extra), *extra) + extra_data
1080
1081 extract_version = max(45, zinfo.extract_version)
1082 create_version = max(45, zinfo.create_version)
1083 else:
1084 extract_version = zinfo.extract_version
1085 create_version = zinfo.create_version
1086
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001087 filename, flag_bits = zinfo._encodeFilenameFlags()
Amaury Forgeot d'Arc2f9d4d12008-03-20 00:35:03 +00001088 centdir = struct.pack(structCentralDir,
1089 stringCentralDir, create_version,
1090 zinfo.create_system, extract_version, zinfo.reserved,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001091 flag_bits, zinfo.compress_type, dostime, dosdate,
Amaury Forgeot d'Arc2f9d4d12008-03-20 00:35:03 +00001092 zinfo.CRC, compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001093 len(filename), len(extra_data), len(zinfo.comment),
Amaury Forgeot d'Arc2f9d4d12008-03-20 00:35:03 +00001094 0, zinfo.internal_attr, zinfo.external_attr,
1095 header_offset)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001096 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001097 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001098 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001099 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001100
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001101 pos2 = self.fp.tell()
1102 # Write end-of-zip-archive record
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001103 if pos1 > ZIP64_LIMIT:
1104 # Need to write the ZIP64 end-of-archive records
1105 zip64endrec = struct.pack(
1106 structEndArchive64, stringEndArchive64,
1107 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1108 self.fp.write(zip64endrec)
1109
1110 zip64locrec = struct.pack(
1111 structEndArchive64Locator,
1112 stringEndArchive64Locator, 0, pos2, 1)
1113 self.fp.write(zip64locrec)
1114
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001115 endrec = struct.pack(structEndArchive, stringEndArchive,
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001116 0, 0, count, count, pos2 - pos1, 0xffffffff, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001117 self.fp.write(endrec)
1118
1119 else:
1120 endrec = struct.pack(structEndArchive, stringEndArchive,
1121 0, 0, count, count, pos2 - pos1, pos1, 0)
1122 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +00001123 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +00001124 if not self._filePassed:
1125 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001126 self.fp = None
1127
1128
1129class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001130 """Class to create ZIP archives with Python library files and packages."""
1131
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001132 def writepy(self, pathname, basename = ""):
1133 """Add all files from "pathname" to the ZIP archive.
1134
Fred Drake484d7352000-10-02 21:14:52 +00001135 If pathname is a package directory, search the directory and
1136 all package subdirectories recursively for all *.py and enter
1137 the modules into the archive. If pathname is a plain
1138 directory, listdir *.py and enter all modules. Else, pathname
1139 must be a Python *.py file and the module will be put into the
1140 archive. Added modules are always module.pyo or module.pyc.
1141 This method will compile the module.py into module.pyc if
1142 necessary.
1143 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001144 dir, name = os.path.split(pathname)
1145 if os.path.isdir(pathname):
1146 initname = os.path.join(pathname, "__init__.py")
1147 if os.path.isfile(initname):
1148 # This is a package directory, add it
1149 if basename:
1150 basename = "%s/%s" % (basename, name)
1151 else:
1152 basename = name
1153 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001154 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001155 fname, arcname = self._get_codename(initname[0:-3], basename)
1156 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001157 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001158 self.write(fname, arcname)
1159 dirlist = os.listdir(pathname)
1160 dirlist.remove("__init__.py")
1161 # Add all *.py files and package subdirectories
1162 for filename in dirlist:
1163 path = os.path.join(pathname, filename)
1164 root, ext = os.path.splitext(filename)
1165 if os.path.isdir(path):
1166 if os.path.isfile(os.path.join(path, "__init__.py")):
1167 # This is a package directory, add it
1168 self.writepy(path, basename) # Recursive call
1169 elif ext == ".py":
1170 fname, arcname = self._get_codename(path[0:-3],
1171 basename)
1172 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001173 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001174 self.write(fname, arcname)
1175 else:
1176 # This is NOT a package directory, add its files at top level
1177 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001178 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001179 for filename in os.listdir(pathname):
1180 path = os.path.join(pathname, filename)
1181 root, ext = os.path.splitext(filename)
1182 if ext == ".py":
1183 fname, arcname = self._get_codename(path[0:-3],
1184 basename)
1185 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001186 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001187 self.write(fname, arcname)
1188 else:
1189 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001190 raise RuntimeError(
1191 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001192 fname, arcname = self._get_codename(pathname[0:-3], basename)
1193 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001194 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001195 self.write(fname, arcname)
1196
1197 def _get_codename(self, pathname, basename):
1198 """Return (filename, archivename) for the path.
1199
Fred Drake484d7352000-10-02 21:14:52 +00001200 Given a module name path, return the correct file path and
1201 archive name, compiling if necessary. For example, given
1202 /python/lib/string, return (/python/lib/string.pyc, string).
1203 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001204 file_py = pathname + ".py"
1205 file_pyc = pathname + ".pyc"
1206 file_pyo = pathname + ".pyo"
1207 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001208 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001209 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001210 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001211 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001212 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001213 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001214 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001215 try:
1216 py_compile.compile(file_py, file_pyc, None, True)
Guido van Rossumb940e112007-01-10 16:19:56 +00001217 except py_compile.PyCompileError as err:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001218 print(err.msg)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001219 fname = file_pyc
1220 else:
1221 fname = file_pyc
1222 archivename = os.path.split(fname)[1]
1223 if basename:
1224 archivename = "%s/%s" % (basename, archivename)
1225 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001226
1227
1228def main(args = None):
1229 import textwrap
1230 USAGE=textwrap.dedent("""\
1231 Usage:
1232 zipfile.py -l zipfile.zip # Show listing of a zipfile
1233 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1234 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1235 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1236 """)
1237 if args is None:
1238 args = sys.argv[1:]
1239
1240 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001241 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001242 sys.exit(1)
1243
1244 if args[0] == '-l':
1245 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001246 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001247 sys.exit(1)
1248 zf = ZipFile(args[1], 'r')
1249 zf.printdir()
1250 zf.close()
1251
1252 elif args[0] == '-t':
1253 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001254 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001255 sys.exit(1)
1256 zf = ZipFile(args[1], 'r')
1257 zf.testzip()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001258 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001259
1260 elif args[0] == '-e':
1261 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001262 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001263 sys.exit(1)
1264
1265 zf = ZipFile(args[1], 'r')
1266 out = args[2]
1267 for path in zf.namelist():
1268 if path.startswith('./'):
1269 tgt = os.path.join(out, path[2:])
1270 else:
1271 tgt = os.path.join(out, path)
1272
1273 tgtdir = os.path.dirname(tgt)
1274 if not os.path.exists(tgtdir):
1275 os.makedirs(tgtdir)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001276 fp = io.open(tgt, 'wb')
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001277 fp.write(zf.read(path))
1278 fp.close()
1279 zf.close()
1280
1281 elif args[0] == '-c':
1282 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001283 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001284 sys.exit(1)
1285
1286 def addToZip(zf, path, zippath):
1287 if os.path.isfile(path):
1288 zf.write(path, zippath, ZIP_DEFLATED)
1289 elif os.path.isdir(path):
1290 for nm in os.listdir(path):
1291 addToZip(zf,
1292 os.path.join(path, nm), os.path.join(zippath, nm))
1293 # else: ignore
1294
1295 zf = ZipFile(args[1], 'w', allowZip64=True)
1296 for src in args[2:]:
1297 addToZip(zf, src, os.path.basename(src))
1298
1299 zf.close()
1300
1301if __name__ == "__main__":
1302 main()