blob: 576eaf97d03322b4e6decc5a97d31cccd70a05c7 [file] [log] [blame]
Fred Drake484d7352000-10-02 21:14:52 +00001"Read and write ZIP files."
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002
3import struct, os, time
Fred Drake484d7352000-10-02 21:14:52 +00004import binascii
Guido van Rossum32abe6f2000-03-31 17:30:02 +00005
6try:
Tim Peterse1190062001-01-15 03:34:38 +00007 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +00008except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00009 zlib = None
10
Skip Montanaro40fc1602001-03-01 04:27:19 +000011__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
12 "ZipInfo", "ZipFile", "PyZipFile"]
13
Fred Drake5db246d2000-09-29 20:44:48 +000014class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015 pass
Tim Peterse1190062001-01-15 03:34:38 +000016error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18# constants for Zip file compression methods
19ZIP_STORED = 0
20ZIP_DEFLATED = 8
21# Other ZIP compression methods not supported
22
23# Here are some struct module formats for reading headers
24structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
25stringEndArchive = "PK\005\006" # magic number for end of archive record
Guido van Rossum1ae4c3d2002-08-12 15:15:51 +000026structCentralDir = "<4s4B4H3l5HLl"# 19 items, central directory, 46 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000027stringCentralDir = "PK\001\002" # magic number for central directory
28structFileHeader = "<4s2B4H3l2H" # 12 items, file header record, 30 bytes
29stringFileHeader = "PK\003\004" # magic number for file header
30
Fred Drake3e038e52001-02-28 17:56:26 +000031# indexes of entries in the central directory structure
32_CD_SIGNATURE = 0
33_CD_CREATE_VERSION = 1
34_CD_CREATE_SYSTEM = 2
35_CD_EXTRACT_VERSION = 3
36_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
37_CD_FLAG_BITS = 5
38_CD_COMPRESS_TYPE = 6
39_CD_TIME = 7
40_CD_DATE = 8
41_CD_CRC = 9
42_CD_COMPRESSED_SIZE = 10
43_CD_UNCOMPRESSED_SIZE = 11
44_CD_FILENAME_LENGTH = 12
45_CD_EXTRA_FIELD_LENGTH = 13
46_CD_COMMENT_LENGTH = 14
47_CD_DISK_NUMBER_START = 15
48_CD_INTERNAL_FILE_ATTRIBUTES = 16
49_CD_EXTERNAL_FILE_ATTRIBUTES = 17
50_CD_LOCAL_HEADER_OFFSET = 18
51
52# indexes of entries in the local file header structure
53_FH_SIGNATURE = 0
54_FH_EXTRACT_VERSION = 1
55_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
56_FH_GENERAL_PURPOSE_FLAG_BITS = 3
57_FH_COMPRESSION_METHOD = 4
58_FH_LAST_MOD_TIME = 5
59_FH_LAST_MOD_DATE = 6
60_FH_CRC = 7
61_FH_COMPRESSED_SIZE = 8
62_FH_UNCOMPRESSED_SIZE = 9
63_FH_FILENAME_LENGTH = 10
64_FH_EXTRA_FIELD_LENGTH = 11
65
Guido van Rossum32abe6f2000-03-31 17:30:02 +000066def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000067 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +000068 try:
69 fpin = open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000070 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +000071 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000072 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +000073 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000074 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000075 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000076 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000077
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000078def _EndRecData(fpin):
79 """Return data from the "End of Central Directory" record, or None.
80
81 The data is a list of the nine items in the ZIP "End of central dir"
82 record followed by a tenth item, the file seek offset of this record."""
83 fpin.seek(-22, 2) # Assume no archive comment.
84 filesize = fpin.tell() + 22 # Get file size
85 data = fpin.read()
86 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
87 endrec = struct.unpack(structEndArchive, data)
88 endrec = list(endrec)
89 endrec.append("") # Append the archive comment
90 endrec.append(filesize - 22) # Append the record start offset
91 return endrec
92 # Search the last END_BLOCK bytes of the file for the record signature.
93 # The comment is appended to the ZIP file and has a 16 bit length.
94 # So the comment may be up to 64K long. We limit the search for the
95 # signature to a few Kbytes at the end of the file for efficiency.
96 # also, the signature must not appear in the comment.
97 END_BLOCK = min(filesize, 1024 * 4)
98 fpin.seek(filesize - END_BLOCK, 0)
99 data = fpin.read()
100 start = data.rfind(stringEndArchive)
101 if start >= 0: # Correct signature string was found
102 endrec = struct.unpack(structEndArchive, data[start:start+22])
103 endrec = list(endrec)
104 comment = data[start+22:]
105 if endrec[7] == len(comment): # Comment length checks out
106 # Append the archive comment and start offset
107 endrec.append(comment)
108 endrec.append(filesize - END_BLOCK + start)
109 return endrec
110 return # Error, return None
111
Fred Drake484d7352000-10-02 21:14:52 +0000112
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000113class ZipInfo:
Fred Drake484d7352000-10-02 21:14:52 +0000114 """Class with attributes describing each file in the ZIP archive."""
115
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000116 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000117 self.orig_filename = filename # Original file name in archive
118# Terminate the file name at the first null byte. Null bytes in file
119# names are used as tricks by viruses in archives.
120 null_byte = filename.find(chr(0))
121 if null_byte >= 0:
122 filename = filename[0:null_byte]
123 print "File name %s contains a suspicious null byte!" % filename
124# This is used to ensure paths in generated ZIP files always use
125# forward slashes as the directory separator, as required by the
126# ZIP format specification.
127 if os.sep != "/":
128 filename = filename.replace(os.sep, "/")
129 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000130 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000131 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000132 self.compress_type = ZIP_STORED # Type of compression for the file
133 self.comment = "" # Comment for each file
134 self.extra = "" # ZIP extra data
135 self.create_system = 0 # System which created ZIP archive
136 self.create_version = 20 # Version which created ZIP archive
137 self.extract_version = 20 # Version needed to extract archive
138 self.reserved = 0 # Must be zero
139 self.flag_bits = 0 # ZIP flag bits
140 self.volume = 0 # Volume number of file header
141 self.internal_attr = 0 # Internal attributes
142 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000143 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000144 # header_offset Byte offset to the file header
145 # file_offset Byte offset to the start of the file data
146 # CRC CRC-32 of the uncompressed file
147 # compress_size Size of the compressed file
148 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000149
150 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000151 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000152 dt = self.date_time
153 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000154 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000155 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000156 # Set these to zero because we write them after the file data
157 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000158 else:
Tim Peterse1190062001-01-15 03:34:38 +0000159 CRC = self.CRC
160 compress_size = self.compress_size
161 file_size = self.file_size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000162 header = struct.pack(structFileHeader, stringFileHeader,
163 self.extract_version, self.reserved, self.flag_bits,
164 self.compress_type, dostime, dosdate, CRC,
165 compress_size, file_size,
166 len(self.filename), len(self.extra))
167 return header + self.filename + self.extra
168
169
170class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000171 """ Class with methods to open, read, write, close, list zip files.
172
Fred Drake3d9091e2001-03-26 15:49:24 +0000173 z = ZipFile(file, mode="r", compression=ZIP_STORED)
Tim Petersa19a1682001-03-29 04:36:09 +0000174
Fred Drake3d9091e2001-03-26 15:49:24 +0000175 file: Either the path to the file, or a file-like object.
176 If it is a path, the file will be opened and closed by ZipFile.
177 mode: The mode can be either read "r", write "w" or append "a".
178 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
179 """
Fred Drake484d7352000-10-02 21:14:52 +0000180
Fred Drake90eac282001-02-28 05:29:34 +0000181 fp = None # Set here since __del__ checks it
182
Fred Drake3d9091e2001-03-26 15:49:24 +0000183 def __init__(self, file, mode="r", compression=ZIP_STORED):
Fred Drake484d7352000-10-02 21:14:52 +0000184 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000185 if compression == ZIP_STORED:
186 pass
187 elif compression == ZIP_DEFLATED:
188 if not zlib:
189 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000190 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000191 else:
192 raise RuntimeError, "That compression method is not supported"
Tim Peterse1190062001-01-15 03:34:38 +0000193 self.debug = 0 # Level of printing: 0 through 3
194 self.NameToInfo = {} # Find file info given name
195 self.filelist = [] # List of ZipInfo instances for archive
196 self.compression = compression # Method of compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000197 self.mode = key = mode[0]
Tim Petersa19a1682001-03-29 04:36:09 +0000198
Fred Drake3d9091e2001-03-26 15:49:24 +0000199 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000200 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000201 self._filePassed = 0
202 self.filename = file
203 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
204 self.fp = open(file, modeDict[mode])
205 else:
206 self._filePassed = 1
207 self.fp = file
208 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000209
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000210 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000211 self._GetContents()
212 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000213 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000214 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000215 try: # See if file is a zip file
216 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000217 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000218 self.fp.seek(self.start_dir, 0)
219 except BadZipfile: # file is not a zip file, just append
220 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000221 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000222 if not self._filePassed:
223 self.fp.close()
224 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000225 raise RuntimeError, 'Mode must be "r", "w" or "a"'
226
227 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000228 """Read the directory, making sure we close the file if the format
229 is bad."""
230 try:
231 self._RealGetContents()
232 except BadZipfile:
233 if not self._filePassed:
234 self.fp.close()
235 self.fp = None
236 raise
237
238 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000239 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000240 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000241 endrec = _EndRecData(fp)
242 if not endrec:
243 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000244 if self.debug > 1:
245 print endrec
Tim Peterse1190062001-01-15 03:34:38 +0000246 size_cd = endrec[5] # bytes in central directory
247 offset_cd = endrec[6] # offset of central directory
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000248 self.comment = endrec[8] # archive comment
249 # endrec[9] is the offset of the "End of Central Dir" record
250 x = endrec[9] - size_cd
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000251 # "concat" is zero, unless zip was concatenated to another file
252 concat = x - offset_cd
253 if self.debug > 2:
254 print "given, inferred, offset", offset_cd, x, concat
255 # self.start_dir: Position of start of central directory
256 self.start_dir = offset_cd + concat
257 fp.seek(self.start_dir, 0)
258 total = 0
259 while total < size_cd:
260 centdir = fp.read(46)
261 total = total + 46
262 if centdir[0:4] != stringCentralDir:
263 raise BadZipfile, "Bad magic number for central directory"
264 centdir = struct.unpack(structCentralDir, centdir)
265 if self.debug > 2:
266 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000267 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000268 # Create ZipInfo instance to store file information
269 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000270 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
271 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
272 total = (total + centdir[_CD_FILENAME_LENGTH]
273 + centdir[_CD_EXTRA_FIELD_LENGTH]
274 + centdir[_CD_COMMENT_LENGTH])
275 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
276 # file_offset must be computed below...
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000277 (x.create_version, x.create_system, x.extract_version, x.reserved,
278 x.flag_bits, x.compress_type, t, d,
279 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
280 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
281 # Convert date/time code to (year, month, day, hour, min, sec)
282 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000283 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000284 self.filelist.append(x)
285 self.NameToInfo[x.filename] = x
286 if self.debug > 2:
287 print "total", total
288 for data in self.filelist:
289 fp.seek(data.header_offset, 0)
290 fheader = fp.read(30)
291 if fheader[0:4] != stringFileHeader:
292 raise BadZipfile, "Bad magic number for file header"
293 fheader = struct.unpack(structFileHeader, fheader)
Fred Drake3e038e52001-02-28 17:56:26 +0000294 # file_offset is computed here, since the extra field for
295 # the central directory and for the local file header
296 # refer to different fields, and they can have different
297 # lengths
298 data.file_offset = (data.header_offset + 30
299 + fheader[_FH_FILENAME_LENGTH]
300 + fheader[_FH_EXTRA_FIELD_LENGTH])
301 fname = fp.read(fheader[_FH_FILENAME_LENGTH])
Greg Ward8e36d282003-06-18 00:53:06 +0000302 if fname != data.orig_filename:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000303 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000304 'File name in directory "%s" and header "%s" differ.' % (
Greg Ward8e36d282003-06-18 00:53:06 +0000305 data.orig_filename, fname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000306
307 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000308 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000309 l = []
310 for data in self.filelist:
311 l.append(data.filename)
312 return l
313
314 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000315 """Return a list of class ZipInfo instances for files in the
316 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000317 return self.filelist
318
319 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000320 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000321 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
322 for zinfo in self.filelist:
323 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
324 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
325
326 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000327 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000328 for zinfo in self.filelist:
329 try:
Tim Peterse1190062001-01-15 03:34:38 +0000330 self.read(zinfo.filename) # Check CRC-32
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000331 except:
332 return zinfo.filename
333
334 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000335 """Return the instance of ZipInfo given 'name'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000336 return self.NameToInfo[name]
337
338 def read(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000339 """Return file bytes (as a string) for name."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000340 if self.mode not in ("r", "a"):
341 raise RuntimeError, 'read() requires mode "r" or "a"'
342 if not self.fp:
343 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000344 "Attempt to read ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000345 zinfo = self.getinfo(name)
346 filepos = self.fp.tell()
347 self.fp.seek(zinfo.file_offset, 0)
348 bytes = self.fp.read(zinfo.compress_size)
349 self.fp.seek(filepos, 0)
350 if zinfo.compress_type == ZIP_STORED:
351 pass
352 elif zinfo.compress_type == ZIP_DEFLATED:
353 if not zlib:
354 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000355 "De-compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000356 # zlib compress/decompress code by Jeremy Hylton of CNRI
357 dc = zlib.decompressobj(-15)
358 bytes = dc.decompress(bytes)
359 # need to feed in unused pad byte so that zlib won't choke
360 ex = dc.decompress('Z') + dc.flush()
361 if ex:
362 bytes = bytes + ex
363 else:
364 raise BadZipfile, \
Fred Drake5db246d2000-09-29 20:44:48 +0000365 "Unsupported compression method %d for file %s" % \
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000366 (zinfo.compress_type, name)
367 crc = binascii.crc32(bytes)
368 if crc != zinfo.CRC:
369 raise BadZipfile, "Bad CRC-32 for file %s" % name
370 return bytes
371
372 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000373 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000374 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000375 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000376 print "Duplicate name:", zinfo.filename
377 if self.mode not in ("w", "a"):
378 raise RuntimeError, 'write() requires mode "w" or "a"'
379 if not self.fp:
380 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000381 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000382 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
383 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000384 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000385 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
386 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000387 "That compression method is not supported"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000388
389 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000390 """Put the bytes from filename into the archive under the name
391 arcname."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000392 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000393 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000394 date_time = mtime[0:6]
395 # Create ZipInfo instance to store file information
396 if arcname is None:
Tim Peterse1190062001-01-15 03:34:38 +0000397 zinfo = ZipInfo(filename, date_time)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000398 else:
Tim Peterse1190062001-01-15 03:34:38 +0000399 zinfo = ZipInfo(arcname, date_time)
Guido van Rossum1ae4c3d2002-08-12 15:15:51 +0000400 zinfo.external_attr = st[0] << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000401 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000402 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000403 else:
Tim Peterse1190062001-01-15 03:34:38 +0000404 zinfo.compress_type = compress_type
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000405 self._writecheck(zinfo)
406 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000407 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000408 zinfo.header_offset = self.fp.tell() # Start of header bytes
Finn Bock03a3bb82001-09-05 18:40:33 +0000409 # Must overwrite CRC and sizes with correct data later
410 zinfo.CRC = CRC = 0
411 zinfo.compress_size = compress_size = 0
412 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000413 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000414 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000415 if zinfo.compress_type == ZIP_DEFLATED:
416 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
417 zlib.DEFLATED, -15)
418 else:
419 cmpr = None
420 while 1:
421 buf = fp.read(1024 * 8)
422 if not buf:
423 break
424 file_size = file_size + len(buf)
425 CRC = binascii.crc32(buf, CRC)
426 if cmpr:
427 buf = cmpr.compress(buf)
428 compress_size = compress_size + len(buf)
429 self.fp.write(buf)
430 fp.close()
431 if cmpr:
432 buf = cmpr.flush()
433 compress_size = compress_size + len(buf)
434 self.fp.write(buf)
435 zinfo.compress_size = compress_size
436 else:
437 zinfo.compress_size = file_size
438 zinfo.CRC = CRC
439 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000440 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000441 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000442 self.fp.seek(zinfo.header_offset + 14, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000443 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
444 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000445 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000446 self.filelist.append(zinfo)
447 self.NameToInfo[zinfo.filename] = zinfo
448
Just van Rossumb083cb32002-12-12 12:23:32 +0000449 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000450 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +0000451 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
452 the name of the file in the archive."""
453 if not isinstance(zinfo_or_arcname, ZipInfo):
454 zinfo = ZipInfo(filename=zinfo_or_arcname,
455 date_time=time.localtime(time.time()))
456 zinfo.compress_type = self.compression
457 else:
458 zinfo = zinfo_or_arcname
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000459 self._writecheck(zinfo)
Tim Peterse1190062001-01-15 03:34:38 +0000460 zinfo.file_size = len(bytes) # Uncompressed size
461 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000462 if zinfo.compress_type == ZIP_DEFLATED:
463 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
464 zlib.DEFLATED, -15)
465 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000466 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000467 else:
468 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000469 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000470 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000471 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000472 self.fp.write(bytes)
473 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000474 # Write CRC and file sizes after the file data
475 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
476 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000477 self.filelist.append(zinfo)
478 self.NameToInfo[zinfo.filename] = zinfo
479
480 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +0000481 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000482 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000483
484 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +0000485 """Close the file, and for mode "w" and "a" write the ending
486 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000487 if self.fp is None:
488 return
Tim Peterse1190062001-01-15 03:34:38 +0000489 if self.mode in ("w", "a"): # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000490 count = 0
491 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +0000492 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000493 count = count + 1
494 dt = zinfo.date_time
495 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000496 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000497 centdir = struct.pack(structCentralDir,
498 stringCentralDir, zinfo.create_version,
499 zinfo.create_system, zinfo.extract_version, zinfo.reserved,
500 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
501 zinfo.CRC, zinfo.compress_size, zinfo.file_size,
502 len(zinfo.filename), len(zinfo.extra), len(zinfo.comment),
503 0, zinfo.internal_attr, zinfo.external_attr,
504 zinfo.header_offset)
505 self.fp.write(centdir)
506 self.fp.write(zinfo.filename)
507 self.fp.write(zinfo.extra)
508 self.fp.write(zinfo.comment)
509 pos2 = self.fp.tell()
510 # Write end-of-zip-archive record
511 endrec = struct.pack(structEndArchive, stringEndArchive,
512 0, 0, count, count, pos2 - pos1, pos1, 0)
513 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +0000514 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +0000515 if not self._filePassed:
516 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000517 self.fp = None
518
519
520class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +0000521 """Class to create ZIP archives with Python library files and packages."""
522
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000523 def writepy(self, pathname, basename = ""):
524 """Add all files from "pathname" to the ZIP archive.
525
Fred Drake484d7352000-10-02 21:14:52 +0000526 If pathname is a package directory, search the directory and
527 all package subdirectories recursively for all *.py and enter
528 the modules into the archive. If pathname is a plain
529 directory, listdir *.py and enter all modules. Else, pathname
530 must be a Python *.py file and the module will be put into the
531 archive. Added modules are always module.pyo or module.pyc.
532 This method will compile the module.py into module.pyc if
533 necessary.
534 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000535 dir, name = os.path.split(pathname)
536 if os.path.isdir(pathname):
537 initname = os.path.join(pathname, "__init__.py")
538 if os.path.isfile(initname):
539 # This is a package directory, add it
540 if basename:
541 basename = "%s/%s" % (basename, name)
542 else:
543 basename = name
544 if self.debug:
545 print "Adding package in", pathname, "as", basename
546 fname, arcname = self._get_codename(initname[0:-3], basename)
547 if self.debug:
548 print "Adding", arcname
549 self.write(fname, arcname)
550 dirlist = os.listdir(pathname)
551 dirlist.remove("__init__.py")
552 # Add all *.py files and package subdirectories
553 for filename in dirlist:
554 path = os.path.join(pathname, filename)
555 root, ext = os.path.splitext(filename)
556 if os.path.isdir(path):
557 if os.path.isfile(os.path.join(path, "__init__.py")):
558 # This is a package directory, add it
559 self.writepy(path, basename) # Recursive call
560 elif ext == ".py":
561 fname, arcname = self._get_codename(path[0:-3],
562 basename)
563 if self.debug:
564 print "Adding", arcname
565 self.write(fname, arcname)
566 else:
567 # This is NOT a package directory, add its files at top level
568 if self.debug:
569 print "Adding files from directory", pathname
570 for filename in os.listdir(pathname):
571 path = os.path.join(pathname, filename)
572 root, ext = os.path.splitext(filename)
573 if ext == ".py":
574 fname, arcname = self._get_codename(path[0:-3],
575 basename)
576 if self.debug:
577 print "Adding", arcname
578 self.write(fname, arcname)
579 else:
580 if pathname[-3:] != ".py":
581 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000582 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000583 fname, arcname = self._get_codename(pathname[0:-3], basename)
584 if self.debug:
585 print "Adding file", arcname
586 self.write(fname, arcname)
587
588 def _get_codename(self, pathname, basename):
589 """Return (filename, archivename) for the path.
590
Fred Drake484d7352000-10-02 21:14:52 +0000591 Given a module name path, return the correct file path and
592 archive name, compiling if necessary. For example, given
593 /python/lib/string, return (/python/lib/string.pyc, string).
594 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000595 file_py = pathname + ".py"
596 file_pyc = pathname + ".pyc"
597 file_pyo = pathname + ".pyo"
598 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000599 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +0000600 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000601 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000602 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +0000603 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000604 if self.debug:
605 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +0000606 try:
607 py_compile.compile(file_py, file_pyc, None, True)
608 except py_compile.PyCompileError,err:
609 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000610 fname = file_pyc
611 else:
612 fname = file_pyc
613 archivename = os.path.split(fname)[1]
614 if basename:
615 archivename = "%s/%s" % (basename, archivename)
616 return (fname, archivename)