blob: 366cee9a4f73d04b9a3ddf71a924a2a52a75f77e [file] [log] [blame]
Fred Drake3c9f9362000-03-31 17:51:10 +00001\section{\module{zipfile} ---
2 Work with ZIP archives}
3
Fred Drake16753752000-09-18 16:21:11 +00004\declaremodule{standard}{zipfile}
Fred Drake3c9f9362000-03-31 17:51:10 +00005\modulesynopsis{Read and write ZIP-format archive files.}
6\moduleauthor{James C. Ahlstrom}{jim@interet.com}
7\sectionauthor{James C. Ahlstrom}{jim@interet.com}
8% LaTeX markup by Fred L. Drake, Jr. <fdrake@acm.org>
9
Fred Drake6300bd42000-09-07 14:01:40 +000010\versionadded{1.6}
11
Fred Drake3c9f9362000-03-31 17:51:10 +000012The ZIP file format is a common archive and compression standard.
13This module provides tools to create, read, write, append, and list a
Fred Drake42780242000-10-02 20:56:30 +000014ZIP file. Any advanced use of this module will require an
15understanding of the format, as defined in
Thomas Wouters0e3f5912006-08-11 14:57:12 +000016\citetitle[http://www.pkware.com/business_and_developers/developer/appnote/]
17{PKZIP Application Note}.
Fred Drake42780242000-10-02 20:56:30 +000018
19This module does not currently handle ZIP files which have appended
Thomas Wouterscf297e42007-02-23 15:07:44 +000020comments, or multi-disk ZIP files. It can handle ZIP files that use
21the ZIP64 extensions (that is ZIP files that are more than 4 GByte in
22size). It supports decryption of encrypted files in ZIP archives, but
23it cannot currently create an encrypted file.
Fred Drake3c9f9362000-03-31 17:51:10 +000024
25The available attributes of this module are:
26
Guido van Rossumb5a755e2007-07-18 18:15:48 +000027\begin{excdesc}{BadZipfile}
28 The error raised for bad ZIP files (old name: \code{zipfile.error}).
Fred Drake3c9f9362000-03-31 17:51:10 +000029\end{excdesc}
30
Thomas Wouters0e3f5912006-08-11 14:57:12 +000031\begin{excdesc}{LargeZipFile}
32 The error raised when a ZIP file would require ZIP64 functionality but that
33 has not been enabled.
34\end{excdesc}
35
Fred Drake96d7a702001-05-11 01:08:13 +000036\begin{classdesc*}{ZipFile}
Fred Drake3c9f9362000-03-31 17:51:10 +000037 The class for reading and writing ZIP files. See
38 ``\citetitle{ZipFile Objects}'' (section \ref{zipfile-objects}) for
39 constructor details.
Fred Drake886f1132001-05-11 15:49:19 +000040\end{classdesc*}
Fred Drake3c9f9362000-03-31 17:51:10 +000041
Fred Drake96d7a702001-05-11 01:08:13 +000042\begin{classdesc*}{PyZipFile}
Fred Drake42780242000-10-02 20:56:30 +000043 Class for creating ZIP archives containing Python libraries.
Fred Drake886f1132001-05-11 15:49:19 +000044\end{classdesc*}
Fred Drake42780242000-10-02 20:56:30 +000045
46\begin{classdesc}{ZipInfo}{\optional{filename\optional{, date_time}}}
Raymond Hettinger68804312005-01-01 00:28:46 +000047 Class used to represent information about a member of an archive.
Fred Drake42780242000-10-02 20:56:30 +000048 Instances of this class are returned by the \method{getinfo()} and
Fred Drakee35360f2000-10-03 15:16:31 +000049 \method{infolist()} methods of \class{ZipFile} objects. Most users
Fred Drake42780242000-10-02 20:56:30 +000050 of the \module{zipfile} module will not need to create these, but
51 only use those created by this module.
52 \var{filename} should be the full name of the archive member, and
53 \var{date_time} should be a tuple containing six fields which
54 describe the time of the last modification to the file; the fields
55 are described in section \ref{zipinfo-objects}, ``ZipInfo Objects.''
56\end{classdesc}
57
Fred Drake5d63a392000-10-06 15:29:56 +000058\begin{funcdesc}{is_zipfile}{filename}
Neal Norwitz6b353702002-04-09 18:15:00 +000059 Returns \code{True} if \var{filename} is a valid ZIP file based on its magic
60 number, otherwise returns \code{False}. This module does not currently
Fred Drake3c9f9362000-03-31 17:51:10 +000061 handle ZIP files which have appended comments.
62\end{funcdesc}
63
Fred Drake3c9f9362000-03-31 17:51:10 +000064\begin{datadesc}{ZIP_STORED}
Fred Drake5d63a392000-10-06 15:29:56 +000065 The numeric constant for an uncompressed archive member.
Fred Drake3c9f9362000-03-31 17:51:10 +000066\end{datadesc}
67
68\begin{datadesc}{ZIP_DEFLATED}
69 The numeric constant for the usual ZIP compression method. This
70 requires the zlib module. No other compression methods are
71 currently supported.
72\end{datadesc}
73
74
75\begin{seealso}
Thomas Wouters0e3f5912006-08-11 14:57:12 +000076 \seetitle[http://www.pkware.com/business_and_developers/developer/appnote/]
77 {PKZIP Application Note}{Documentation on the ZIP file format by
78 Phil Katz, the creator of the format and algorithms used.}
Fred Drake58295de2000-09-30 00:11:45 +000079
80 \seetitle[http://www.info-zip.org/pub/infozip/]{Info-ZIP Home Page}{
81 Information about the Info-ZIP project's ZIP archive
82 programs and development libraries.}
Fred Drake3c9f9362000-03-31 17:51:10 +000083\end{seealso}
84
85
86\subsection{ZipFile Objects \label{zipfile-objects}}
87
Thomas Wouters0e3f5912006-08-11 14:57:12 +000088\begin{classdesc}{ZipFile}{file\optional{, mode\optional{, compression\optional{, allowZip64}}}}
Fred Drakebda3a592001-05-09 19:57:37 +000089 Open a ZIP file, where \var{file} can be either a path to a file
Fred Drake907e76b2001-07-06 20:30:11 +000090 (a string) or a file-like object. The \var{mode} parameter
Fred Drake3c9f9362000-03-31 17:51:10 +000091 should be \code{'r'} to read an existing file, \code{'w'} to
92 truncate and write a new file, or \code{'a'} to append to an
Guido van Rossumb5a755e2007-07-18 18:15:48 +000093 existing file. If \var{mode} is \code{'a'} and \var{file}
Fred Drake3c9f9362000-03-31 17:51:10 +000094 refers to an existing ZIP file, then additional files are added to
Fred Drakebda3a592001-05-09 19:57:37 +000095 it. If \var{file} does not refer to a ZIP file, then a new ZIP
Fred Drake3c9f9362000-03-31 17:51:10 +000096 archive is appended to the file. This is meant for adding a ZIP
97 archive to another file, such as \file{python.exe}. Using
Fred Drake42780242000-10-02 20:56:30 +000098
Fred Drake3c9f9362000-03-31 17:51:10 +000099\begin{verbatim}
100cat myzip.zip >> python.exe
101\end{verbatim}
Fred Drake42780242000-10-02 20:56:30 +0000102
Fred Drake3c9f9362000-03-31 17:51:10 +0000103 also works, and at least \program{WinZip} can read such files.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000104 If \var{mode} is \code{a} and the file does not exist at all,
105 it is created.
Fred Drake3c9f9362000-03-31 17:51:10 +0000106 \var{compression} is the ZIP compression method to use when writing
107 the archive, and should be \constant{ZIP_STORED} or
108 \constant{ZIP_DEFLATED}; unrecognized values will cause
Fred Drakee35360f2000-10-03 15:16:31 +0000109 \exception{RuntimeError} to be raised. If \constant{ZIP_DEFLATED}
Thomas Heller3d62f8c2002-01-14 08:37:39 +0000110 is specified but the \refmodule{zlib} module is not available,
Fred Drakee35360f2000-10-03 15:16:31 +0000111 \exception{RuntimeError} is also raised. The default is
Fred Drake3c9f9362000-03-31 17:51:10 +0000112 \constant{ZIP_STORED}.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000113 If \var{allowZip64} is \code{True} zipfile will create ZIP files that use
114 the ZIP64 extensions when the zipfile is larger than 2 GB. If it is
115 false (the default) \module{zipfile} will raise an exception when the
116 ZIP file would require ZIP64 extensions. ZIP64 extensions are disabled by
117 default because the default \program{zip} and \program{unzip} commands on
118 \UNIX{} (the InfoZIP utilities) don't support these extensions.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000119
120 \versionchanged[If the file does not exist, it is created if the
121 mode is 'a']{2.6}
Fred Drake3c9f9362000-03-31 17:51:10 +0000122\end{classdesc}
123
Fred Drakee35360f2000-10-03 15:16:31 +0000124\begin{methoddesc}{close}{}
125 Close the archive file. You must call \method{close()} before
126 exiting your program or essential records will not be written.
127\end{methoddesc}
128
129\begin{methoddesc}{getinfo}{name}
130 Return a \class{ZipInfo} object with information about the archive
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000131 member \var{name}. Calling \method{getinfo()} for a name not currently
132 contained in the archive will raise a \exception{KeyError}.
Fred Drakee35360f2000-10-03 15:16:31 +0000133\end{methoddesc}
134
Fred Drake42780242000-10-02 20:56:30 +0000135\begin{methoddesc}{infolist}{}
136 Return a list containing a \class{ZipInfo} object for each member of
137 the archive. The objects are in the same order as their entries in
138 the actual ZIP file on disk if an existing archive was opened.
Fred Drake3c9f9362000-03-31 17:51:10 +0000139\end{methoddesc}
140
Fred Drake6fe9bac2000-10-11 18:56:00 +0000141\begin{methoddesc}{namelist}{}
142 Return a list of archive members by name.
143\end{methoddesc}
144
Guido van Rossumd8faa362007-04-27 19:54:29 +0000145\begin{methoddesc}{open}{name\optional{, mode\optional{, pwd}}}
146 Extract a member from the archive as a file-like object (ZipExtFile).
147 \var{name} is the name of the file in the archive. The \var{mode}
148 parameter, if included, must be one of the following: \code{'r'} (the
149 default), \code{'U'}, or \code{'rU'}. Choosing \code{'U'} or
150 \code{'rU'} will enable universal newline support in the read-only
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000151 object. \var{pwd} is the password used for encrypted files. Calling
152 \method{open()} on a closed ZipFile will raise a
153 \exception{RuntimeError}.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000154 \begin{notice}
155 The file-like object is read-only and provides the following methods:
156 \method{read()}, \method{readline()}, \method{readlines()},
157 \method{__iter__()}, \method{next()}.
158 \end{notice}
159 \begin{notice}
160 If the ZipFile was created by passing in a file-like object as the
161 first argument to the constructor, then the object returned by
162 \method{open()} shares the ZipFile's file pointer. Under these
163 circumstances, the object returned by \method{open()} should not
164 be used after any additional operations are performed on the
165 ZipFile object. If the ZipFile was created by passing in a string
166 (the filename) as the first argument to the constructor, then
167 \method{open()} will create a new file object that will be held
168 by the ZipExtFile, allowing it to operate independently of the
169 ZipFile.
170 \end{notice}
171
172 \versionadded{2.6}
173\end{methoddesc}
174
Fred Drake3c9f9362000-03-31 17:51:10 +0000175\begin{methoddesc}{printdir}{}
Fred Drake42780242000-10-02 20:56:30 +0000176 Print a table of contents for the archive to \code{sys.stdout}.
Fred Drake3c9f9362000-03-31 17:51:10 +0000177\end{methoddesc}
178
Thomas Wouterscf297e42007-02-23 15:07:44 +0000179\begin{methoddesc}{setpassword}{pwd}
180 Set \var{pwd} as default password to extract encrypted files.
181 \versionadded{2.6}
182\end{methoddesc}
183
184\begin{methoddesc}{read}{name\optional{, pwd}}
Fred Drake3c9f9362000-03-31 17:51:10 +0000185 Return the bytes of the file in the archive. The archive must be
Thomas Wouterscf297e42007-02-23 15:07:44 +0000186 open for read or append. \var{pwd} is the password used for encrypted
187 files and, if specified, it will override the default password set with
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000188 \method{setpassword()}. Calling \method{read()} on a closed ZipFile
189 will raise a \exception{RuntimeError}.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000190
191 \versionchanged[\var{pwd} was added]{2.6}
Fred Drake3c9f9362000-03-31 17:51:10 +0000192\end{methoddesc}
193
Fred Drake42780242000-10-02 20:56:30 +0000194\begin{methoddesc}{testzip}{}
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000195 Read all the files in the archive and check their CRC's and file
196 headers. Return the name of the first bad file, or else return \code{None}.
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000197 Calling \method{testzip()} on a closed ZipFile will raise a
198 \exception{RuntimeError}.
Fred Drake42780242000-10-02 20:56:30 +0000199\end{methoddesc}
200
Fred Drakee35360f2000-10-03 15:16:31 +0000201\begin{methoddesc}{write}{filename\optional{, arcname\optional{,
202 compress_type}}}
Fred Drake42780242000-10-02 20:56:30 +0000203 Write the file named \var{filename} to the archive, giving it the
Fred Drakee35360f2000-10-03 15:16:31 +0000204 archive name \var{arcname} (by default, this will be the same as
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000205 \var{filename}, but without a drive letter and with leading path
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000206 separators removed). If given, \var{compress_type} overrides the
207 value given for the \var{compression} parameter to the constructor
208 for the new entry. The archive must be open with mode \code{'w'}
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000209 or \code{'a'} -- calling \method{write()} on a ZipFile created with
210 mode \code{'r'} will raise a \exception{RuntimeError}. Calling
211 \method{write()} on a closed ZipFile will raise a
212 \exception{RuntimeError}.
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000213
214 \note{There is no official file name encoding for ZIP files.
215 If you have unicode file names, please convert them to byte strings
216 in your desired encoding before passing them to \method{write()}.
217 WinZip interprets all file names as encoded in CP437, also known
218 as DOS Latin.}
219
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000220 \note{Archive names should be relative to the archive root, that is,
221 they should not start with a path separator.}
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000222
223 \note{If \code{arcname} (or \code{filename}, if \code{arcname} is
224 not given) contains a null byte, the name of the file in the archive will
225 be truncated at the null byte.}
226
Fred Drake3c9f9362000-03-31 17:51:10 +0000227\end{methoddesc}
228
Just van Rossumb083cb32002-12-12 12:23:32 +0000229\begin{methoddesc}{writestr}{zinfo_or_arcname, bytes}
230 Write the string \var{bytes} to the archive; \var{zinfo_or_arcname}
231 is either the file name it will be given in the archive, or a
232 \class{ZipInfo} instance. If it's an instance, at least the
233 filename, date, and time must be given. If it's a name, the date
234 and time is set to the current date and time. The archive must be
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000235 opened with mode \code{'w'} or \code{'a'} -- calling
236 \method{writestr()} on a ZipFile created with mode \code{'r'}
237 will raise a \exception{RuntimeError}. Calling \method{writestr()}
238 on a closed ZipFile will raise a \exception{RuntimeError}.
Fred Drake3c9f9362000-03-31 17:51:10 +0000239\end{methoddesc}
240
Fred Drake42780242000-10-02 20:56:30 +0000241
242The following data attribute is also available:
243
244\begin{memberdesc}{debug}
245 The level of debug output to use. This may be set from \code{0}
246 (the default, no output) to \code{3} (the most output). Debugging
247 information is written to \code{sys.stdout}.
248\end{memberdesc}
249
250
251\subsection{PyZipFile Objects \label{pyzipfile-objects}}
252
253The \class{PyZipFile} constructor takes the same parameters as the
254\class{ZipFile} constructor. Instances have one method in addition to
255those of \class{ZipFile} objects.
256
257\begin{methoddesc}[PyZipFile]{writepy}{pathname\optional{, basename}}
Fred Drake3c9f9362000-03-31 17:51:10 +0000258 Search for files \file{*.py} and add the corresponding file to the
259 archive. The corresponding file is a \file{*.pyo} file if
260 available, else a \file{*.pyc} file, compiling if necessary. If the
261 pathname is a file, the filename must end with \file{.py}, and just
Fred Drake42780242000-10-02 20:56:30 +0000262 the (corresponding \file{*.py[co]}) file is added at the top level
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000263 (no path information). If the pathname is a file that does not end with
264 \file{.py}, a \exception{RuntimeError} will be raised. If it is a
265 directory, and the directory is not a package directory, then all the
266 files \file{*.py[co]} are added at the top level. If the directory is
267 a package directory, then all \file{*.py[co]} are added under the package
268 name as a file path, and if any subdirectories are package directories, all
269 of these are added recursively. \var{basename} is intended for
Fred Drake3c9f9362000-03-31 17:51:10 +0000270 internal use only. The \method{writepy()} method makes archives
271 with file names like this:
272
273\begin{verbatim}
274 string.pyc # Top level name
275 test/__init__.pyc # Package directory
276 test/testall.pyc # Module test.testall
277 test/bogus/__init__.pyc # Subpackage directory
278 test/bogus/myfile.pyc # Submodule test.bogus.myfile
279\end{verbatim}
280\end{methoddesc}
281
Fred Drake42780242000-10-02 20:56:30 +0000282
283\subsection{ZipInfo Objects \label{zipinfo-objects}}
284
285Instances of the \class{ZipInfo} class are returned by the
Fred Drakee35360f2000-10-03 15:16:31 +0000286\method{getinfo()} and \method{infolist()} methods of
Fred Drake42780242000-10-02 20:56:30 +0000287\class{ZipFile} objects. Each object stores information about a
288single member of the ZIP archive.
289
290Instances have the following attributes:
291
292\begin{memberdesc}[ZipInfo]{filename}
293 Name of the file in the archive.
294\end{memberdesc}
295
296\begin{memberdesc}[ZipInfo]{date_time}
Raymond Hettinger999b57c2003-08-25 04:28:05 +0000297 The time and date of the last modification to the archive
Fred Drake42780242000-10-02 20:56:30 +0000298 member. This is a tuple of six values:
299
300\begin{tableii}{c|l}{code}{Index}{Value}
301 \lineii{0}{Year}
302 \lineii{1}{Month (one-based)}
303 \lineii{2}{Day of month (one-based)}
304 \lineii{3}{Hours (zero-based)}
305 \lineii{4}{Minutes (zero-based)}
306 \lineii{5}{Seconds (zero-based)}
307\end{tableii}
308\end{memberdesc}
309
310\begin{memberdesc}[ZipInfo]{compress_type}
311 Type of compression for the archive member.
312\end{memberdesc}
313
314\begin{memberdesc}[ZipInfo]{comment}
315 Comment for the individual archive member.
316\end{memberdesc}
317
318\begin{memberdesc}[ZipInfo]{extra}
319 Expansion field data. The
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000320 \citetitle[http://www.pkware.com/business_and_developers/developer/appnote/]
321 {PKZIP Application Note} contains some comments on the internal
322 structure of the data contained in this string.
Fred Drake42780242000-10-02 20:56:30 +0000323\end{memberdesc}
324
325\begin{memberdesc}[ZipInfo]{create_system}
326 System which created ZIP archive.
327\end{memberdesc}
328
329\begin{memberdesc}[ZipInfo]{create_version}
330 PKZIP version which created ZIP archive.
331\end{memberdesc}
332
333\begin{memberdesc}[ZipInfo]{extract_version}
334 PKZIP version needed to extract archive.
335\end{memberdesc}
336
337\begin{memberdesc}[ZipInfo]{reserved}
338 Must be zero.
339\end{memberdesc}
340
341\begin{memberdesc}[ZipInfo]{flag_bits}
342 ZIP flag bits.
343\end{memberdesc}
344
345\begin{memberdesc}[ZipInfo]{volume}
346 Volume number of file header.
347\end{memberdesc}
348
349\begin{memberdesc}[ZipInfo]{internal_attr}
350 Internal attributes.
351\end{memberdesc}
352
353\begin{memberdesc}[ZipInfo]{external_attr}
354 External file attributes.
355\end{memberdesc}
356
357\begin{memberdesc}[ZipInfo]{header_offset}
358 Byte offset to the file header.
359\end{memberdesc}
360
Fred Drake42780242000-10-02 20:56:30 +0000361\begin{memberdesc}[ZipInfo]{CRC}
362 CRC-32 of the uncompressed file.
363\end{memberdesc}
364
365\begin{memberdesc}[ZipInfo]{compress_size}
366 Size of the compressed data.
367\end{memberdesc}
368
369\begin{memberdesc}[ZipInfo]{file_size}
370 Size of the uncompressed file.
371\end{memberdesc}