blob: 4a93323478be08705fe5fb39956ff03dc9797d7f [file] [log] [blame]
Fred Drake3c9f9362000-03-31 17:51:10 +00001\section{\module{zipfile} ---
2 Work with ZIP archives}
3
Fred Drake16753752000-09-18 16:21:11 +00004\declaremodule{standard}{zipfile}
Fred Drake3c9f9362000-03-31 17:51:10 +00005\modulesynopsis{Read and write ZIP-format archive files.}
6\moduleauthor{James C. Ahlstrom}{jim@interet.com}
7\sectionauthor{James C. Ahlstrom}{jim@interet.com}
8% LaTeX markup by Fred L. Drake, Jr. <fdrake@acm.org>
9
Fred Drake6300bd42000-09-07 14:01:40 +000010\versionadded{1.6}
11
Fred Drake3c9f9362000-03-31 17:51:10 +000012The ZIP file format is a common archive and compression standard.
13This module provides tools to create, read, write, append, and list a
Fred Drake42780242000-10-02 20:56:30 +000014ZIP file. Any advanced use of this module will require an
15understanding of the format, as defined in
16\citetitle[http://www.pkware.com/appnote.html]{PKZIP Application
17Note}.
18
19This module does not currently handle ZIP files which have appended
20comments, or multi-disk ZIP files.
Fred Drake3c9f9362000-03-31 17:51:10 +000021
22The available attributes of this module are:
23
24\begin{excdesc}{error}
25 The error raised for bad ZIP files.
26\end{excdesc}
27
28\begin{datadesc}{_debug}
29 Level of printing, defaults to \code{1}.
30\end{datadesc}
31
Fred Drake42780242000-10-02 20:56:30 +000032\begin{classdesc}{ZipFile}{\unspecified}
Fred Drake3c9f9362000-03-31 17:51:10 +000033 The class for reading and writing ZIP files. See
34 ``\citetitle{ZipFile Objects}'' (section \ref{zipfile-objects}) for
35 constructor details.
36\end{classdesc}
37
Fred Drake42780242000-10-02 20:56:30 +000038\begin{classdesc}{PyZipFile}{\unspecified}
39 Class for creating ZIP archives containing Python libraries.
40\end{classdesc}
41
42\begin{classdesc}{ZipInfo}{\optional{filename\optional{, date_time}}}
43 Class used the represent infomation about a member of an archive.
44 Instances of this class are returned by the \method{getinfo()} and
45 \method{listinfo()} methods of \class{ZipFile} objects. Most users
46 of the \module{zipfile} module will not need to create these, but
47 only use those created by this module.
48 \var{filename} should be the full name of the archive member, and
49 \var{date_time} should be a tuple containing six fields which
50 describe the time of the last modification to the file; the fields
51 are described in section \ref{zipinfo-objects}, ``ZipInfo Objects.''
52\end{classdesc}
53
Fred Drake3c9f9362000-03-31 17:51:10 +000054\begin{funcdesc}{is_zipfile}{path}
55 Returns true if \var{path} is a valid ZIP file based on its magic
56 number, otherwise returns false. This module does not currently
57 handle ZIP files which have appended comments.
58\end{funcdesc}
59
Fred Drake3c9f9362000-03-31 17:51:10 +000060\begin{datadesc}{ZIP_STORED}
61 The numeric constant (\code{0}) for an uncompressed archive member.
62\end{datadesc}
63
64\begin{datadesc}{ZIP_DEFLATED}
65 The numeric constant for the usual ZIP compression method. This
66 requires the zlib module. No other compression methods are
67 currently supported.
68\end{datadesc}
69
70
71\begin{seealso}
Fred Drake58295de2000-09-30 00:11:45 +000072 \seetitle[http://www.pkware.com/appnote.html]{PKZIP Application
73 Note}{Documentation on the ZIP file format by Phil
74 Katz, the creator of the format and algorithms used.}
75
76 \seetitle[http://www.info-zip.org/pub/infozip/]{Info-ZIP Home Page}{
77 Information about the Info-ZIP project's ZIP archive
78 programs and development libraries.}
Fred Drake3c9f9362000-03-31 17:51:10 +000079\end{seealso}
80
81
82\subsection{ZipFile Objects \label{zipfile-objects}}
83
84\begin{classdesc}{ZipFile}{filename\optional{, mode\optional{, compression}}}
85 Open a ZIP file named \var{filename}. The \var{mode} parameter
86 should be \code{'r'} to read an existing file, \code{'w'} to
87 truncate and write a new file, or \code{'a'} to append to an
88 existing file. For \var{mode} is \code{'a'} and \var{filename}
89 refers to an existing ZIP file, then additional files are added to
90 it. If \var{filename} does not refer to a ZIP file, then a new ZIP
91 archive is appended to the file. This is meant for adding a ZIP
92 archive to another file, such as \file{python.exe}. Using
Fred Drake42780242000-10-02 20:56:30 +000093
Fred Drake3c9f9362000-03-31 17:51:10 +000094\begin{verbatim}
95cat myzip.zip >> python.exe
96\end{verbatim}
Fred Drake42780242000-10-02 20:56:30 +000097
Fred Drake3c9f9362000-03-31 17:51:10 +000098 also works, and at least \program{WinZip} can read such files.
99 \var{compression} is the ZIP compression method to use when writing
100 the archive, and should be \constant{ZIP_STORED} or
101 \constant{ZIP_DEFLATED}; unrecognized values will cause
102 \exception{ValueError} to be raised. The default is
103 \constant{ZIP_STORED}.
104\end{classdesc}
105
Fred Drake42780242000-10-02 20:56:30 +0000106\begin{methoddesc}{namelist}{}
107 Return a list of archive members by name.
108\end{methoddesc}
Fred Drake3c9f9362000-03-31 17:51:10 +0000109
Fred Drake42780242000-10-02 20:56:30 +0000110\begin{methoddesc}{infolist}{}
111 Return a list containing a \class{ZipInfo} object for each member of
112 the archive. The objects are in the same order as their entries in
113 the actual ZIP file on disk if an existing archive was opened.
Fred Drake3c9f9362000-03-31 17:51:10 +0000114\end{methoddesc}
115
116\begin{methoddesc}{printdir}{}
Fred Drake42780242000-10-02 20:56:30 +0000117 Print a table of contents for the archive to \code{sys.stdout}.
Fred Drake3c9f9362000-03-31 17:51:10 +0000118\end{methoddesc}
119
120\begin{methoddesc}{read}{name}
121 Return the bytes of the file in the archive. The archive must be
122 open for read or append.
123\end{methoddesc}
124
Fred Drake42780242000-10-02 20:56:30 +0000125\begin{methoddesc}{testzip}{}
126 Read all the files in the archive and check their CRC's. Return the
127 name of the first bad file, or else return \code{None}.
128\end{methoddesc}
129
130\begin{methoddesc}{writestr}{bytes, arcname, year, month, day,
131 hour, minute, second}
Fred Drake3c9f9362000-03-31 17:51:10 +0000132 Write the string \var{bytes} and the other data to the archive, and
Fred Drake42780242000-10-02 20:56:30 +0000133 give the archive member the name \var{arcname}. The archive must be
134 opened with mode \code{'w'} or \code{'a'}.
135\end{methoddesc}
136
137\begin{methoddesc}{write}{filename, arcname}
138 Write the file named \var{filename} to the archive, giving it the
139 archive name \var{arcname}. The archive must be open with mode
Fred Drake3c9f9362000-03-31 17:51:10 +0000140 \code{'w'} or \code{'a'}.
141\end{methoddesc}
142
Fred Drake42780242000-10-02 20:56:30 +0000143\begin{methoddesc}{close}{}
144 Close the archive file. You must call \method{close()} before
145 exiting your program or essential records will not be written.
Fred Drake3c9f9362000-03-31 17:51:10 +0000146\end{methoddesc}
147
Fred Drake42780242000-10-02 20:56:30 +0000148
149The following data attribute is also available:
150
151\begin{memberdesc}{debug}
152 The level of debug output to use. This may be set from \code{0}
153 (the default, no output) to \code{3} (the most output). Debugging
154 information is written to \code{sys.stdout}.
155\end{memberdesc}
156
157
158\subsection{PyZipFile Objects \label{pyzipfile-objects}}
159
160The \class{PyZipFile} constructor takes the same parameters as the
161\class{ZipFile} constructor. Instances have one method in addition to
162those of \class{ZipFile} objects.
163
164\begin{methoddesc}[PyZipFile]{writepy}{pathname\optional{, basename}}
Fred Drake3c9f9362000-03-31 17:51:10 +0000165 Search for files \file{*.py} and add the corresponding file to the
166 archive. The corresponding file is a \file{*.pyo} file if
167 available, else a \file{*.pyc} file, compiling if necessary. If the
168 pathname is a file, the filename must end with \file{.py}, and just
Fred Drake42780242000-10-02 20:56:30 +0000169 the (corresponding \file{*.py[co]}) file is added at the top level
Fred Drake3c9f9362000-03-31 17:51:10 +0000170 (no path information). If it is a directory, and the directory is
Fred Drake42780242000-10-02 20:56:30 +0000171 not a package directory, then all the files \file{*.py[co]} are
Fred Drake3c9f9362000-03-31 17:51:10 +0000172 added at the top level. If the directory is a package directory,
173 then all \file{*.py[oc]} are added under the package name as a file
174 path, and if any subdirectories are package directories, all of
175 these are added recursively. \var{basename} is intended for
176 internal use only. The \method{writepy()} method makes archives
177 with file names like this:
178
179\begin{verbatim}
180 string.pyc # Top level name
181 test/__init__.pyc # Package directory
182 test/testall.pyc # Module test.testall
183 test/bogus/__init__.pyc # Subpackage directory
184 test/bogus/myfile.pyc # Submodule test.bogus.myfile
185\end{verbatim}
186\end{methoddesc}
187
Fred Drake42780242000-10-02 20:56:30 +0000188
189\subsection{ZipInfo Objects \label{zipinfo-objects}}
190
191Instances of the \class{ZipInfo} class are returned by the
192\method{getinfo()} and \method{listinfo()} methods of
193\class{ZipFile} objects. Each object stores information about a
194single member of the ZIP archive.
195
196Instances have the following attributes:
197
198\begin{memberdesc}[ZipInfo]{filename}
199 Name of the file in the archive.
200\end{memberdesc}
201
202\begin{memberdesc}[ZipInfo]{date_time}
203 The time and date of the last modification to to the archive
204 member. This is a tuple of six values:
205
206\begin{tableii}{c|l}{code}{Index}{Value}
207 \lineii{0}{Year}
208 \lineii{1}{Month (one-based)}
209 \lineii{2}{Day of month (one-based)}
210 \lineii{3}{Hours (zero-based)}
211 \lineii{4}{Minutes (zero-based)}
212 \lineii{5}{Seconds (zero-based)}
213\end{tableii}
214\end{memberdesc}
215
216\begin{memberdesc}[ZipInfo]{compress_type}
217 Type of compression for the archive member.
218\end{memberdesc}
219
220\begin{memberdesc}[ZipInfo]{comment}
221 Comment for the individual archive member.
222\end{memberdesc}
223
224\begin{memberdesc}[ZipInfo]{extra}
225 Expansion field data. The
226 \citetitle[http://www.pkware.com/appnote.html]{PKZIP Application
227 Note} contains some comments on the internal structure of the data
228 contained in this string.
229\end{memberdesc}
230
231\begin{memberdesc}[ZipInfo]{create_system}
232 System which created ZIP archive.
233\end{memberdesc}
234
235\begin{memberdesc}[ZipInfo]{create_version}
236 PKZIP version which created ZIP archive.
237\end{memberdesc}
238
239\begin{memberdesc}[ZipInfo]{extract_version}
240 PKZIP version needed to extract archive.
241\end{memberdesc}
242
243\begin{memberdesc}[ZipInfo]{reserved}
244 Must be zero.
245\end{memberdesc}
246
247\begin{memberdesc}[ZipInfo]{flag_bits}
248 ZIP flag bits.
249\end{memberdesc}
250
251\begin{memberdesc}[ZipInfo]{volume}
252 Volume number of file header.
253\end{memberdesc}
254
255\begin{memberdesc}[ZipInfo]{internal_attr}
256 Internal attributes.
257\end{memberdesc}
258
259\begin{memberdesc}[ZipInfo]{external_attr}
260 External file attributes.
261\end{memberdesc}
262
263\begin{memberdesc}[ZipInfo]{header_offset}
264 Byte offset to the file header.
265\end{memberdesc}
266
267\begin{memberdesc}[ZipInfo]{file_offset}
268 Byte offset to the start of the file data.
269\end{memberdesc}
270
271\begin{memberdesc}[ZipInfo]{CRC}
272 CRC-32 of the uncompressed file.
273\end{memberdesc}
274
275\begin{memberdesc}[ZipInfo]{compress_size}
276 Size of the compressed data.
277\end{memberdesc}
278
279\begin{memberdesc}[ZipInfo]{file_size}
280 Size of the uncompressed file.
281\end{memberdesc}