blob: 4e06ef692ece7991273e9296e467103965496a72 [file] [log] [blame]
Fred Drake3c9f9362000-03-31 17:51:10 +00001\section{\module{zipfile} ---
2 Work with ZIP archives}
3
Fred Drake16753752000-09-18 16:21:11 +00004\declaremodule{standard}{zipfile}
Fred Drake3c9f9362000-03-31 17:51:10 +00005\modulesynopsis{Read and write ZIP-format archive files.}
6\moduleauthor{James C. Ahlstrom}{jim@interet.com}
7\sectionauthor{James C. Ahlstrom}{jim@interet.com}
8% LaTeX markup by Fred L. Drake, Jr. <fdrake@acm.org>
9
Fred Drake6300bd42000-09-07 14:01:40 +000010\versionadded{1.6}
11
Fred Drake3c9f9362000-03-31 17:51:10 +000012The ZIP file format is a common archive and compression standard.
13This module provides tools to create, read, write, append, and list a
Fred Drake42780242000-10-02 20:56:30 +000014ZIP file. Any advanced use of this module will require an
15understanding of the format, as defined in
16\citetitle[http://www.pkware.com/appnote.html]{PKZIP Application
17Note}.
18
19This module does not currently handle ZIP files which have appended
20comments, or multi-disk ZIP files.
Fred Drake3c9f9362000-03-31 17:51:10 +000021
22The available attributes of this module are:
23
24\begin{excdesc}{error}
25 The error raised for bad ZIP files.
26\end{excdesc}
27
Fred Drake96d7a702001-05-11 01:08:13 +000028\begin{classdesc*}{ZipFile}
Fred Drake3c9f9362000-03-31 17:51:10 +000029 The class for reading and writing ZIP files. See
30 ``\citetitle{ZipFile Objects}'' (section \ref{zipfile-objects}) for
31 constructor details.
Fred Drake886f1132001-05-11 15:49:19 +000032\end{classdesc*}
Fred Drake3c9f9362000-03-31 17:51:10 +000033
Fred Drake96d7a702001-05-11 01:08:13 +000034\begin{classdesc*}{PyZipFile}
Fred Drake42780242000-10-02 20:56:30 +000035 Class for creating ZIP archives containing Python libraries.
Fred Drake886f1132001-05-11 15:49:19 +000036\end{classdesc*}
Fred Drake42780242000-10-02 20:56:30 +000037
38\begin{classdesc}{ZipInfo}{\optional{filename\optional{, date_time}}}
Raymond Hettinger68804312005-01-01 00:28:46 +000039 Class used to represent information about a member of an archive.
Fred Drake42780242000-10-02 20:56:30 +000040 Instances of this class are returned by the \method{getinfo()} and
Fred Drakee35360f2000-10-03 15:16:31 +000041 \method{infolist()} methods of \class{ZipFile} objects. Most users
Fred Drake42780242000-10-02 20:56:30 +000042 of the \module{zipfile} module will not need to create these, but
43 only use those created by this module.
44 \var{filename} should be the full name of the archive member, and
45 \var{date_time} should be a tuple containing six fields which
46 describe the time of the last modification to the file; the fields
47 are described in section \ref{zipinfo-objects}, ``ZipInfo Objects.''
48\end{classdesc}
49
Fred Drake5d63a392000-10-06 15:29:56 +000050\begin{funcdesc}{is_zipfile}{filename}
Neal Norwitz6b353702002-04-09 18:15:00 +000051 Returns \code{True} if \var{filename} is a valid ZIP file based on its magic
52 number, otherwise returns \code{False}. This module does not currently
Fred Drake3c9f9362000-03-31 17:51:10 +000053 handle ZIP files which have appended comments.
54\end{funcdesc}
55
Fred Drake3c9f9362000-03-31 17:51:10 +000056\begin{datadesc}{ZIP_STORED}
Fred Drake5d63a392000-10-06 15:29:56 +000057 The numeric constant for an uncompressed archive member.
Fred Drake3c9f9362000-03-31 17:51:10 +000058\end{datadesc}
59
60\begin{datadesc}{ZIP_DEFLATED}
61 The numeric constant for the usual ZIP compression method. This
62 requires the zlib module. No other compression methods are
63 currently supported.
64\end{datadesc}
65
66
67\begin{seealso}
Fred Drake58295de2000-09-30 00:11:45 +000068 \seetitle[http://www.pkware.com/appnote.html]{PKZIP Application
69 Note}{Documentation on the ZIP file format by Phil
70 Katz, the creator of the format and algorithms used.}
71
72 \seetitle[http://www.info-zip.org/pub/infozip/]{Info-ZIP Home Page}{
73 Information about the Info-ZIP project's ZIP archive
74 programs and development libraries.}
Fred Drake3c9f9362000-03-31 17:51:10 +000075\end{seealso}
76
77
78\subsection{ZipFile Objects \label{zipfile-objects}}
79
Fred Drakebda3a592001-05-09 19:57:37 +000080\begin{classdesc}{ZipFile}{file\optional{, mode\optional{, compression}}}
81 Open a ZIP file, where \var{file} can be either a path to a file
Fred Drake907e76b2001-07-06 20:30:11 +000082 (a string) or a file-like object. The \var{mode} parameter
Fred Drake3c9f9362000-03-31 17:51:10 +000083 should be \code{'r'} to read an existing file, \code{'w'} to
84 truncate and write a new file, or \code{'a'} to append to an
Fred Drakebda3a592001-05-09 19:57:37 +000085 existing file. For \var{mode} is \code{'a'} and \var{file}
Fred Drake3c9f9362000-03-31 17:51:10 +000086 refers to an existing ZIP file, then additional files are added to
Fred Drakebda3a592001-05-09 19:57:37 +000087 it. If \var{file} does not refer to a ZIP file, then a new ZIP
Fred Drake3c9f9362000-03-31 17:51:10 +000088 archive is appended to the file. This is meant for adding a ZIP
89 archive to another file, such as \file{python.exe}. Using
Fred Drake42780242000-10-02 20:56:30 +000090
Fred Drake3c9f9362000-03-31 17:51:10 +000091\begin{verbatim}
92cat myzip.zip >> python.exe
93\end{verbatim}
Fred Drake42780242000-10-02 20:56:30 +000094
Fred Drake3c9f9362000-03-31 17:51:10 +000095 also works, and at least \program{WinZip} can read such files.
96 \var{compression} is the ZIP compression method to use when writing
97 the archive, and should be \constant{ZIP_STORED} or
98 \constant{ZIP_DEFLATED}; unrecognized values will cause
Fred Drakee35360f2000-10-03 15:16:31 +000099 \exception{RuntimeError} to be raised. If \constant{ZIP_DEFLATED}
Thomas Heller3d62f8c2002-01-14 08:37:39 +0000100 is specified but the \refmodule{zlib} module is not available,
Fred Drakee35360f2000-10-03 15:16:31 +0000101 \exception{RuntimeError} is also raised. The default is
Fred Drake3c9f9362000-03-31 17:51:10 +0000102 \constant{ZIP_STORED}.
103\end{classdesc}
104
Fred Drakee35360f2000-10-03 15:16:31 +0000105\begin{methoddesc}{close}{}
106 Close the archive file. You must call \method{close()} before
107 exiting your program or essential records will not be written.
108\end{methoddesc}
109
110\begin{methoddesc}{getinfo}{name}
111 Return a \class{ZipInfo} object with information about the archive
112 member \var{name}.
113\end{methoddesc}
114
Fred Drake42780242000-10-02 20:56:30 +0000115\begin{methoddesc}{infolist}{}
116 Return a list containing a \class{ZipInfo} object for each member of
117 the archive. The objects are in the same order as their entries in
118 the actual ZIP file on disk if an existing archive was opened.
Fred Drake3c9f9362000-03-31 17:51:10 +0000119\end{methoddesc}
120
Fred Drake6fe9bac2000-10-11 18:56:00 +0000121\begin{methoddesc}{namelist}{}
122 Return a list of archive members by name.
123\end{methoddesc}
124
Fred Drake3c9f9362000-03-31 17:51:10 +0000125\begin{methoddesc}{printdir}{}
Fred Drake42780242000-10-02 20:56:30 +0000126 Print a table of contents for the archive to \code{sys.stdout}.
Fred Drake3c9f9362000-03-31 17:51:10 +0000127\end{methoddesc}
128
129\begin{methoddesc}{read}{name}
130 Return the bytes of the file in the archive. The archive must be
131 open for read or append.
132\end{methoddesc}
133
Fred Drake42780242000-10-02 20:56:30 +0000134\begin{methoddesc}{testzip}{}
135 Read all the files in the archive and check their CRC's. Return the
136 name of the first bad file, or else return \code{None}.
137\end{methoddesc}
138
Fred Drakee35360f2000-10-03 15:16:31 +0000139\begin{methoddesc}{write}{filename\optional{, arcname\optional{,
140 compress_type}}}
Fred Drake42780242000-10-02 20:56:30 +0000141 Write the file named \var{filename} to the archive, giving it the
Fred Drakee35360f2000-10-03 15:16:31 +0000142 archive name \var{arcname} (by default, this will be the same as
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000143 \var{filename}, but without a drive letter and with leading path
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000144 separators removed). If given, \var{compress_type} overrides the
145 value given for the \var{compression} parameter to the constructor
146 for the new entry. The archive must be open with mode \code{'w'}
147 or \code{'a'}.
148
149 \note{There is no official file name encoding for ZIP files.
150 If you have unicode file names, please convert them to byte strings
151 in your desired encoding before passing them to \method{write()}.
152 WinZip interprets all file names as encoded in CP437, also known
153 as DOS Latin.}
154
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000155 \note{Archive names should be relative to the archive root, that is,
156 they should not start with a path separator.}
Fred Drake3c9f9362000-03-31 17:51:10 +0000157\end{methoddesc}
158
Just van Rossumb083cb32002-12-12 12:23:32 +0000159\begin{methoddesc}{writestr}{zinfo_or_arcname, bytes}
160 Write the string \var{bytes} to the archive; \var{zinfo_or_arcname}
161 is either the file name it will be given in the archive, or a
162 \class{ZipInfo} instance. If it's an instance, at least the
163 filename, date, and time must be given. If it's a name, the date
164 and time is set to the current date and time. The archive must be
165 opened with mode \code{'w'} or \code{'a'}.
Fred Drake3c9f9362000-03-31 17:51:10 +0000166\end{methoddesc}
167
Fred Drake42780242000-10-02 20:56:30 +0000168
169The following data attribute is also available:
170
171\begin{memberdesc}{debug}
172 The level of debug output to use. This may be set from \code{0}
173 (the default, no output) to \code{3} (the most output). Debugging
174 information is written to \code{sys.stdout}.
175\end{memberdesc}
176
177
178\subsection{PyZipFile Objects \label{pyzipfile-objects}}
179
180The \class{PyZipFile} constructor takes the same parameters as the
181\class{ZipFile} constructor. Instances have one method in addition to
182those of \class{ZipFile} objects.
183
184\begin{methoddesc}[PyZipFile]{writepy}{pathname\optional{, basename}}
Fred Drake3c9f9362000-03-31 17:51:10 +0000185 Search for files \file{*.py} and add the corresponding file to the
186 archive. The corresponding file is a \file{*.pyo} file if
187 available, else a \file{*.pyc} file, compiling if necessary. If the
188 pathname is a file, the filename must end with \file{.py}, and just
Fred Drake42780242000-10-02 20:56:30 +0000189 the (corresponding \file{*.py[co]}) file is added at the top level
Fred Drake3c9f9362000-03-31 17:51:10 +0000190 (no path information). If it is a directory, and the directory is
Fred Drake42780242000-10-02 20:56:30 +0000191 not a package directory, then all the files \file{*.py[co]} are
Fred Drake3c9f9362000-03-31 17:51:10 +0000192 added at the top level. If the directory is a package directory,
193 then all \file{*.py[oc]} are added under the package name as a file
194 path, and if any subdirectories are package directories, all of
195 these are added recursively. \var{basename} is intended for
196 internal use only. The \method{writepy()} method makes archives
197 with file names like this:
198
199\begin{verbatim}
200 string.pyc # Top level name
201 test/__init__.pyc # Package directory
202 test/testall.pyc # Module test.testall
203 test/bogus/__init__.pyc # Subpackage directory
204 test/bogus/myfile.pyc # Submodule test.bogus.myfile
205\end{verbatim}
206\end{methoddesc}
207
Fred Drake42780242000-10-02 20:56:30 +0000208
209\subsection{ZipInfo Objects \label{zipinfo-objects}}
210
211Instances of the \class{ZipInfo} class are returned by the
Fred Drakee35360f2000-10-03 15:16:31 +0000212\method{getinfo()} and \method{infolist()} methods of
Fred Drake42780242000-10-02 20:56:30 +0000213\class{ZipFile} objects. Each object stores information about a
214single member of the ZIP archive.
215
216Instances have the following attributes:
217
218\begin{memberdesc}[ZipInfo]{filename}
219 Name of the file in the archive.
220\end{memberdesc}
221
222\begin{memberdesc}[ZipInfo]{date_time}
Raymond Hettinger999b57c2003-08-25 04:28:05 +0000223 The time and date of the last modification to the archive
Fred Drake42780242000-10-02 20:56:30 +0000224 member. This is a tuple of six values:
225
226\begin{tableii}{c|l}{code}{Index}{Value}
227 \lineii{0}{Year}
228 \lineii{1}{Month (one-based)}
229 \lineii{2}{Day of month (one-based)}
230 \lineii{3}{Hours (zero-based)}
231 \lineii{4}{Minutes (zero-based)}
232 \lineii{5}{Seconds (zero-based)}
233\end{tableii}
234\end{memberdesc}
235
236\begin{memberdesc}[ZipInfo]{compress_type}
237 Type of compression for the archive member.
238\end{memberdesc}
239
240\begin{memberdesc}[ZipInfo]{comment}
241 Comment for the individual archive member.
242\end{memberdesc}
243
244\begin{memberdesc}[ZipInfo]{extra}
245 Expansion field data. The
246 \citetitle[http://www.pkware.com/appnote.html]{PKZIP Application
247 Note} contains some comments on the internal structure of the data
248 contained in this string.
249\end{memberdesc}
250
251\begin{memberdesc}[ZipInfo]{create_system}
252 System which created ZIP archive.
253\end{memberdesc}
254
255\begin{memberdesc}[ZipInfo]{create_version}
256 PKZIP version which created ZIP archive.
257\end{memberdesc}
258
259\begin{memberdesc}[ZipInfo]{extract_version}
260 PKZIP version needed to extract archive.
261\end{memberdesc}
262
263\begin{memberdesc}[ZipInfo]{reserved}
264 Must be zero.
265\end{memberdesc}
266
267\begin{memberdesc}[ZipInfo]{flag_bits}
268 ZIP flag bits.
269\end{memberdesc}
270
271\begin{memberdesc}[ZipInfo]{volume}
272 Volume number of file header.
273\end{memberdesc}
274
275\begin{memberdesc}[ZipInfo]{internal_attr}
276 Internal attributes.
277\end{memberdesc}
278
279\begin{memberdesc}[ZipInfo]{external_attr}
280 External file attributes.
281\end{memberdesc}
282
283\begin{memberdesc}[ZipInfo]{header_offset}
284 Byte offset to the file header.
285\end{memberdesc}
286
287\begin{memberdesc}[ZipInfo]{file_offset}
288 Byte offset to the start of the file data.
289\end{memberdesc}
290
291\begin{memberdesc}[ZipInfo]{CRC}
292 CRC-32 of the uncompressed file.
293\end{memberdesc}
294
295\begin{memberdesc}[ZipInfo]{compress_size}
296 Size of the compressed data.
297\end{memberdesc}
298
299\begin{memberdesc}[ZipInfo]{file_size}
300 Size of the uncompressed file.
301\end{memberdesc}