blob: 47d1e5a108f476fd8e22f013b62d31b3fc321484 [file] [log] [blame]
Fred Drake3c9f9362000-03-31 17:51:10 +00001\section{\module{zipfile} ---
2 Work with ZIP archives}
3
Fred Drake16753752000-09-18 16:21:11 +00004\declaremodule{standard}{zipfile}
Fred Drake3c9f9362000-03-31 17:51:10 +00005\modulesynopsis{Read and write ZIP-format archive files.}
6\moduleauthor{James C. Ahlstrom}{jim@interet.com}
7\sectionauthor{James C. Ahlstrom}{jim@interet.com}
8% LaTeX markup by Fred L. Drake, Jr. <fdrake@acm.org>
9
Fred Drake6300bd42000-09-07 14:01:40 +000010\versionadded{1.6}
11
Fred Drake3c9f9362000-03-31 17:51:10 +000012The ZIP file format is a common archive and compression standard.
13This module provides tools to create, read, write, append, and list a
Fred Drake42780242000-10-02 20:56:30 +000014ZIP file. Any advanced use of this module will require an
15understanding of the format, as defined in
George Yoshida3f1f7be2006-06-17 16:39:13 +000016\citetitle[http://www.pkware.com/business_and_developers/developer/appnote/]
17{PKZIP Application Note}.
Fred Drake42780242000-10-02 20:56:30 +000018
19This module does not currently handle ZIP files which have appended
Ronald Oussoren143cefb2006-06-15 08:14:18 +000020comments, or multi-disk ZIP files. It can handle ZIP files that use the
21ZIP64 extensions (that is ZIP files that are more than 4 GByte in size).
Fred Drake3c9f9362000-03-31 17:51:10 +000022
23The available attributes of this module are:
24
25\begin{excdesc}{error}
26 The error raised for bad ZIP files.
27\end{excdesc}
28
Ronald Oussoren143cefb2006-06-15 08:14:18 +000029\begin{excdesc}{LargeZipFile}
30 The error raised when a ZIP file would require ZIP64 functionality but that
31 has not been enabled.
32\end{excdesc}
33
Fred Drake96d7a702001-05-11 01:08:13 +000034\begin{classdesc*}{ZipFile}
Fred Drake3c9f9362000-03-31 17:51:10 +000035 The class for reading and writing ZIP files. See
36 ``\citetitle{ZipFile Objects}'' (section \ref{zipfile-objects}) for
37 constructor details.
Fred Drake886f1132001-05-11 15:49:19 +000038\end{classdesc*}
Fred Drake3c9f9362000-03-31 17:51:10 +000039
Fred Drake96d7a702001-05-11 01:08:13 +000040\begin{classdesc*}{PyZipFile}
Fred Drake42780242000-10-02 20:56:30 +000041 Class for creating ZIP archives containing Python libraries.
Fred Drake886f1132001-05-11 15:49:19 +000042\end{classdesc*}
Fred Drake42780242000-10-02 20:56:30 +000043
44\begin{classdesc}{ZipInfo}{\optional{filename\optional{, date_time}}}
Raymond Hettinger68804312005-01-01 00:28:46 +000045 Class used to represent information about a member of an archive.
Fred Drake42780242000-10-02 20:56:30 +000046 Instances of this class are returned by the \method{getinfo()} and
Fred Drakee35360f2000-10-03 15:16:31 +000047 \method{infolist()} methods of \class{ZipFile} objects. Most users
Fred Drake42780242000-10-02 20:56:30 +000048 of the \module{zipfile} module will not need to create these, but
49 only use those created by this module.
50 \var{filename} should be the full name of the archive member, and
51 \var{date_time} should be a tuple containing six fields which
52 describe the time of the last modification to the file; the fields
53 are described in section \ref{zipinfo-objects}, ``ZipInfo Objects.''
54\end{classdesc}
55
Fred Drake5d63a392000-10-06 15:29:56 +000056\begin{funcdesc}{is_zipfile}{filename}
Neal Norwitz6b353702002-04-09 18:15:00 +000057 Returns \code{True} if \var{filename} is a valid ZIP file based on its magic
58 number, otherwise returns \code{False}. This module does not currently
Fred Drake3c9f9362000-03-31 17:51:10 +000059 handle ZIP files which have appended comments.
60\end{funcdesc}
61
Fred Drake3c9f9362000-03-31 17:51:10 +000062\begin{datadesc}{ZIP_STORED}
Fred Drake5d63a392000-10-06 15:29:56 +000063 The numeric constant for an uncompressed archive member.
Fred Drake3c9f9362000-03-31 17:51:10 +000064\end{datadesc}
65
66\begin{datadesc}{ZIP_DEFLATED}
67 The numeric constant for the usual ZIP compression method. This
68 requires the zlib module. No other compression methods are
69 currently supported.
70\end{datadesc}
71
72
73\begin{seealso}
George Yoshida3f1f7be2006-06-17 16:39:13 +000074 \seetitle[http://www.pkware.com/business_and_developers/developer/appnote/]
75 {PKZIP Application Note}{Documentation on the ZIP file format by
76 Phil Katz, the creator of the format and algorithms used.}
Fred Drake58295de2000-09-30 00:11:45 +000077
78 \seetitle[http://www.info-zip.org/pub/infozip/]{Info-ZIP Home Page}{
79 Information about the Info-ZIP project's ZIP archive
80 programs and development libraries.}
Fred Drake3c9f9362000-03-31 17:51:10 +000081\end{seealso}
82
83
84\subsection{ZipFile Objects \label{zipfile-objects}}
85
Ronald Oussoren143cefb2006-06-15 08:14:18 +000086\begin{classdesc}{ZipFile}{file\optional{, mode\optional{, compression\optional{, allowZip64}}}}
Fred Drakebda3a592001-05-09 19:57:37 +000087 Open a ZIP file, where \var{file} can be either a path to a file
Fred Drake907e76b2001-07-06 20:30:11 +000088 (a string) or a file-like object. The \var{mode} parameter
Fred Drake3c9f9362000-03-31 17:51:10 +000089 should be \code{'r'} to read an existing file, \code{'w'} to
90 truncate and write a new file, or \code{'a'} to append to an
Fred Drakebda3a592001-05-09 19:57:37 +000091 existing file. For \var{mode} is \code{'a'} and \var{file}
Fred Drake3c9f9362000-03-31 17:51:10 +000092 refers to an existing ZIP file, then additional files are added to
Fred Drakebda3a592001-05-09 19:57:37 +000093 it. If \var{file} does not refer to a ZIP file, then a new ZIP
Fred Drake3c9f9362000-03-31 17:51:10 +000094 archive is appended to the file. This is meant for adding a ZIP
95 archive to another file, such as \file{python.exe}. Using
Fred Drake42780242000-10-02 20:56:30 +000096
Fred Drake3c9f9362000-03-31 17:51:10 +000097\begin{verbatim}
98cat myzip.zip >> python.exe
99\end{verbatim}
Fred Drake42780242000-10-02 20:56:30 +0000100
Fred Drake3c9f9362000-03-31 17:51:10 +0000101 also works, and at least \program{WinZip} can read such files.
102 \var{compression} is the ZIP compression method to use when writing
103 the archive, and should be \constant{ZIP_STORED} or
104 \constant{ZIP_DEFLATED}; unrecognized values will cause
Fred Drakee35360f2000-10-03 15:16:31 +0000105 \exception{RuntimeError} to be raised. If \constant{ZIP_DEFLATED}
Thomas Heller3d62f8c2002-01-14 08:37:39 +0000106 is specified but the \refmodule{zlib} module is not available,
Fred Drakee35360f2000-10-03 15:16:31 +0000107 \exception{RuntimeError} is also raised. The default is
Fred Drake3c9f9362000-03-31 17:51:10 +0000108 \constant{ZIP_STORED}.
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000109 If \var{allowZip64} is \code{True} zipfile will create zipfiles that use
110 the ZIP64 extensions when the zipfile is larger than 2GBytes. If it is
111 false (the default) zipfile will raise an exception when the zipfile would
112 require ZIP64 extensions. ZIP64 extensions are disabled by default because
113 the default zip and unzip commands on Unix (the InfoZIP utilities) don't
114 support these extensions.
Fred Drake3c9f9362000-03-31 17:51:10 +0000115\end{classdesc}
116
Fred Drakee35360f2000-10-03 15:16:31 +0000117\begin{methoddesc}{close}{}
118 Close the archive file. You must call \method{close()} before
119 exiting your program or essential records will not be written.
120\end{methoddesc}
121
122\begin{methoddesc}{getinfo}{name}
123 Return a \class{ZipInfo} object with information about the archive
124 member \var{name}.
125\end{methoddesc}
126
Fred Drake42780242000-10-02 20:56:30 +0000127\begin{methoddesc}{infolist}{}
128 Return a list containing a \class{ZipInfo} object for each member of
129 the archive. The objects are in the same order as their entries in
130 the actual ZIP file on disk if an existing archive was opened.
Fred Drake3c9f9362000-03-31 17:51:10 +0000131\end{methoddesc}
132
Fred Drake6fe9bac2000-10-11 18:56:00 +0000133\begin{methoddesc}{namelist}{}
134 Return a list of archive members by name.
135\end{methoddesc}
136
Fred Drake3c9f9362000-03-31 17:51:10 +0000137\begin{methoddesc}{printdir}{}
Fred Drake42780242000-10-02 20:56:30 +0000138 Print a table of contents for the archive to \code{sys.stdout}.
Fred Drake3c9f9362000-03-31 17:51:10 +0000139\end{methoddesc}
140
141\begin{methoddesc}{read}{name}
142 Return the bytes of the file in the archive. The archive must be
143 open for read or append.
144\end{methoddesc}
145
Fred Drake42780242000-10-02 20:56:30 +0000146\begin{methoddesc}{testzip}{}
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000147 Read all the files in the archive and check their CRC's and file
148 headers. Return the name of the first bad file, or else return \code{None}.
Fred Drake42780242000-10-02 20:56:30 +0000149\end{methoddesc}
150
Fred Drakee35360f2000-10-03 15:16:31 +0000151\begin{methoddesc}{write}{filename\optional{, arcname\optional{,
152 compress_type}}}
Fred Drake42780242000-10-02 20:56:30 +0000153 Write the file named \var{filename} to the archive, giving it the
Fred Drakee35360f2000-10-03 15:16:31 +0000154 archive name \var{arcname} (by default, this will be the same as
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000155 \var{filename}, but without a drive letter and with leading path
Georg Brandlcaf95392006-04-06 10:03:32 +0000156 separators removed). If given, \var{compress_type} overrides the
157 value given for the \var{compression} parameter to the constructor
158 for the new entry. The archive must be open with mode \code{'w'}
159 or \code{'a'}.
160
161 \note{There is no official file name encoding for ZIP files.
162 If you have unicode file names, please convert them to byte strings
163 in your desired encoding before passing them to \method{write()}.
164 WinZip interprets all file names as encoded in CP437, also known
165 as DOS Latin.}
166
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000167 \note{Archive names should be relative to the archive root, that is,
168 they should not start with a path separator.}
Fred Drake3c9f9362000-03-31 17:51:10 +0000169\end{methoddesc}
170
Just van Rossumb083cb32002-12-12 12:23:32 +0000171\begin{methoddesc}{writestr}{zinfo_or_arcname, bytes}
172 Write the string \var{bytes} to the archive; \var{zinfo_or_arcname}
173 is either the file name it will be given in the archive, or a
174 \class{ZipInfo} instance. If it's an instance, at least the
175 filename, date, and time must be given. If it's a name, the date
176 and time is set to the current date and time. The archive must be
177 opened with mode \code{'w'} or \code{'a'}.
Fred Drake3c9f9362000-03-31 17:51:10 +0000178\end{methoddesc}
179
Fred Drake42780242000-10-02 20:56:30 +0000180
181The following data attribute is also available:
182
183\begin{memberdesc}{debug}
184 The level of debug output to use. This may be set from \code{0}
185 (the default, no output) to \code{3} (the most output). Debugging
186 information is written to \code{sys.stdout}.
187\end{memberdesc}
188
189
190\subsection{PyZipFile Objects \label{pyzipfile-objects}}
191
192The \class{PyZipFile} constructor takes the same parameters as the
193\class{ZipFile} constructor. Instances have one method in addition to
194those of \class{ZipFile} objects.
195
196\begin{methoddesc}[PyZipFile]{writepy}{pathname\optional{, basename}}
Fred Drake3c9f9362000-03-31 17:51:10 +0000197 Search for files \file{*.py} and add the corresponding file to the
198 archive. The corresponding file is a \file{*.pyo} file if
199 available, else a \file{*.pyc} file, compiling if necessary. If the
200 pathname is a file, the filename must end with \file{.py}, and just
Fred Drake42780242000-10-02 20:56:30 +0000201 the (corresponding \file{*.py[co]}) file is added at the top level
Fred Drake3c9f9362000-03-31 17:51:10 +0000202 (no path information). If it is a directory, and the directory is
Fred Drake42780242000-10-02 20:56:30 +0000203 not a package directory, then all the files \file{*.py[co]} are
Fred Drake3c9f9362000-03-31 17:51:10 +0000204 added at the top level. If the directory is a package directory,
205 then all \file{*.py[oc]} are added under the package name as a file
206 path, and if any subdirectories are package directories, all of
207 these are added recursively. \var{basename} is intended for
208 internal use only. The \method{writepy()} method makes archives
209 with file names like this:
210
211\begin{verbatim}
212 string.pyc # Top level name
213 test/__init__.pyc # Package directory
214 test/testall.pyc # Module test.testall
215 test/bogus/__init__.pyc # Subpackage directory
216 test/bogus/myfile.pyc # Submodule test.bogus.myfile
217\end{verbatim}
218\end{methoddesc}
219
Fred Drake42780242000-10-02 20:56:30 +0000220
221\subsection{ZipInfo Objects \label{zipinfo-objects}}
222
223Instances of the \class{ZipInfo} class are returned by the
Fred Drakee35360f2000-10-03 15:16:31 +0000224\method{getinfo()} and \method{infolist()} methods of
Fred Drake42780242000-10-02 20:56:30 +0000225\class{ZipFile} objects. Each object stores information about a
226single member of the ZIP archive.
227
228Instances have the following attributes:
229
230\begin{memberdesc}[ZipInfo]{filename}
231 Name of the file in the archive.
232\end{memberdesc}
233
234\begin{memberdesc}[ZipInfo]{date_time}
Raymond Hettinger999b57c2003-08-25 04:28:05 +0000235 The time and date of the last modification to the archive
Fred Drake42780242000-10-02 20:56:30 +0000236 member. This is a tuple of six values:
237
238\begin{tableii}{c|l}{code}{Index}{Value}
239 \lineii{0}{Year}
240 \lineii{1}{Month (one-based)}
241 \lineii{2}{Day of month (one-based)}
242 \lineii{3}{Hours (zero-based)}
243 \lineii{4}{Minutes (zero-based)}
244 \lineii{5}{Seconds (zero-based)}
245\end{tableii}
246\end{memberdesc}
247
248\begin{memberdesc}[ZipInfo]{compress_type}
249 Type of compression for the archive member.
250\end{memberdesc}
251
252\begin{memberdesc}[ZipInfo]{comment}
253 Comment for the individual archive member.
254\end{memberdesc}
255
256\begin{memberdesc}[ZipInfo]{extra}
257 Expansion field data. The
George Yoshida3f1f7be2006-06-17 16:39:13 +0000258 \citetitle[http://www.pkware.com/business_and_developers/developer/appnote/]
259 {PKZIP Application Note} contains some comments on the internal
260 structure of the data contained in this string.
Fred Drake42780242000-10-02 20:56:30 +0000261\end{memberdesc}
262
263\begin{memberdesc}[ZipInfo]{create_system}
264 System which created ZIP archive.
265\end{memberdesc}
266
267\begin{memberdesc}[ZipInfo]{create_version}
268 PKZIP version which created ZIP archive.
269\end{memberdesc}
270
271\begin{memberdesc}[ZipInfo]{extract_version}
272 PKZIP version needed to extract archive.
273\end{memberdesc}
274
275\begin{memberdesc}[ZipInfo]{reserved}
276 Must be zero.
277\end{memberdesc}
278
279\begin{memberdesc}[ZipInfo]{flag_bits}
280 ZIP flag bits.
281\end{memberdesc}
282
283\begin{memberdesc}[ZipInfo]{volume}
284 Volume number of file header.
285\end{memberdesc}
286
287\begin{memberdesc}[ZipInfo]{internal_attr}
288 Internal attributes.
289\end{memberdesc}
290
291\begin{memberdesc}[ZipInfo]{external_attr}
292 External file attributes.
293\end{memberdesc}
294
295\begin{memberdesc}[ZipInfo]{header_offset}
296 Byte offset to the file header.
297\end{memberdesc}
298
Fred Drake42780242000-10-02 20:56:30 +0000299\begin{memberdesc}[ZipInfo]{CRC}
300 CRC-32 of the uncompressed file.
301\end{memberdesc}
302
303\begin{memberdesc}[ZipInfo]{compress_size}
304 Size of the compressed data.
305\end{memberdesc}
306
307\begin{memberdesc}[ZipInfo]{file_size}
308 Size of the uncompressed file.
309\end{memberdesc}