Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 1 | \section{\module{zipfile} --- |
| 2 | Work with ZIP archives} |
| 3 | |
Fred Drake | 1675375 | 2000-09-18 16:21:11 +0000 | [diff] [blame] | 4 | \declaremodule{standard}{zipfile} |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 5 | \modulesynopsis{Read and write ZIP-format archive files.} |
| 6 | \moduleauthor{James C. Ahlstrom}{jim@interet.com} |
| 7 | \sectionauthor{James C. Ahlstrom}{jim@interet.com} |
| 8 | % LaTeX markup by Fred L. Drake, Jr. <fdrake@acm.org> |
| 9 | |
Fred Drake | 6300bd4 | 2000-09-07 14:01:40 +0000 | [diff] [blame] | 10 | \versionadded{1.6} |
| 11 | |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 12 | The ZIP file format is a common archive and compression standard. |
| 13 | This module provides tools to create, read, write, append, and list a |
Fred Drake | 4278024 | 2000-10-02 20:56:30 +0000 | [diff] [blame] | 14 | ZIP file. Any advanced use of this module will require an |
| 15 | understanding of the format, as defined in |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame^] | 16 | \citetitle[http://www.pkware.com/business_and_developers/developer/appnote/] |
| 17 | {PKZIP Application Note}. |
Fred Drake | 4278024 | 2000-10-02 20:56:30 +0000 | [diff] [blame] | 18 | |
| 19 | This module does not currently handle ZIP files which have appended |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame^] | 20 | comments, or multi-disk ZIP files. It can handle ZIP files that use the |
| 21 | ZIP64 extensions (that is ZIP files that are more than 4 GByte in size). |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 22 | |
| 23 | The available attributes of this module are: |
| 24 | |
| 25 | \begin{excdesc}{error} |
| 26 | The error raised for bad ZIP files. |
| 27 | \end{excdesc} |
| 28 | |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame^] | 29 | \begin{excdesc}{LargeZipFile} |
| 30 | The error raised when a ZIP file would require ZIP64 functionality but that |
| 31 | has not been enabled. |
| 32 | \end{excdesc} |
| 33 | |
Fred Drake | 96d7a70 | 2001-05-11 01:08:13 +0000 | [diff] [blame] | 34 | \begin{classdesc*}{ZipFile} |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 35 | The class for reading and writing ZIP files. See |
| 36 | ``\citetitle{ZipFile Objects}'' (section \ref{zipfile-objects}) for |
| 37 | constructor details. |
Fred Drake | 886f113 | 2001-05-11 15:49:19 +0000 | [diff] [blame] | 38 | \end{classdesc*} |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 39 | |
Fred Drake | 96d7a70 | 2001-05-11 01:08:13 +0000 | [diff] [blame] | 40 | \begin{classdesc*}{PyZipFile} |
Fred Drake | 4278024 | 2000-10-02 20:56:30 +0000 | [diff] [blame] | 41 | Class for creating ZIP archives containing Python libraries. |
Fred Drake | 886f113 | 2001-05-11 15:49:19 +0000 | [diff] [blame] | 42 | \end{classdesc*} |
Fred Drake | 4278024 | 2000-10-02 20:56:30 +0000 | [diff] [blame] | 43 | |
| 44 | \begin{classdesc}{ZipInfo}{\optional{filename\optional{, date_time}}} |
Raymond Hettinger | 6880431 | 2005-01-01 00:28:46 +0000 | [diff] [blame] | 45 | Class used to represent information about a member of an archive. |
Fred Drake | 4278024 | 2000-10-02 20:56:30 +0000 | [diff] [blame] | 46 | Instances of this class are returned by the \method{getinfo()} and |
Fred Drake | e35360f | 2000-10-03 15:16:31 +0000 | [diff] [blame] | 47 | \method{infolist()} methods of \class{ZipFile} objects. Most users |
Fred Drake | 4278024 | 2000-10-02 20:56:30 +0000 | [diff] [blame] | 48 | of the \module{zipfile} module will not need to create these, but |
| 49 | only use those created by this module. |
| 50 | \var{filename} should be the full name of the archive member, and |
| 51 | \var{date_time} should be a tuple containing six fields which |
| 52 | describe the time of the last modification to the file; the fields |
| 53 | are described in section \ref{zipinfo-objects}, ``ZipInfo Objects.'' |
| 54 | \end{classdesc} |
| 55 | |
Fred Drake | 5d63a39 | 2000-10-06 15:29:56 +0000 | [diff] [blame] | 56 | \begin{funcdesc}{is_zipfile}{filename} |
Neal Norwitz | 6b35370 | 2002-04-09 18:15:00 +0000 | [diff] [blame] | 57 | Returns \code{True} if \var{filename} is a valid ZIP file based on its magic |
| 58 | number, otherwise returns \code{False}. This module does not currently |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 59 | handle ZIP files which have appended comments. |
| 60 | \end{funcdesc} |
| 61 | |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 62 | \begin{datadesc}{ZIP_STORED} |
Fred Drake | 5d63a39 | 2000-10-06 15:29:56 +0000 | [diff] [blame] | 63 | The numeric constant for an uncompressed archive member. |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 64 | \end{datadesc} |
| 65 | |
| 66 | \begin{datadesc}{ZIP_DEFLATED} |
| 67 | The numeric constant for the usual ZIP compression method. This |
| 68 | requires the zlib module. No other compression methods are |
| 69 | currently supported. |
| 70 | \end{datadesc} |
| 71 | |
| 72 | |
| 73 | \begin{seealso} |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame^] | 74 | \seetitle[http://www.pkware.com/business_and_developers/developer/appnote/] |
| 75 | {PKZIP Application Note}{Documentation on the ZIP file format by |
| 76 | Phil Katz, the creator of the format and algorithms used.} |
Fred Drake | 58295de | 2000-09-30 00:11:45 +0000 | [diff] [blame] | 77 | |
| 78 | \seetitle[http://www.info-zip.org/pub/infozip/]{Info-ZIP Home Page}{ |
| 79 | Information about the Info-ZIP project's ZIP archive |
| 80 | programs and development libraries.} |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 81 | \end{seealso} |
| 82 | |
| 83 | |
| 84 | \subsection{ZipFile Objects \label{zipfile-objects}} |
| 85 | |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame^] | 86 | \begin{classdesc}{ZipFile}{file\optional{, mode\optional{, compression\optional{, allowZip64}}}} |
Fred Drake | bda3a59 | 2001-05-09 19:57:37 +0000 | [diff] [blame] | 87 | Open a ZIP file, where \var{file} can be either a path to a file |
Fred Drake | 907e76b | 2001-07-06 20:30:11 +0000 | [diff] [blame] | 88 | (a string) or a file-like object. The \var{mode} parameter |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 89 | should be \code{'r'} to read an existing file, \code{'w'} to |
| 90 | truncate and write a new file, or \code{'a'} to append to an |
Fred Drake | bda3a59 | 2001-05-09 19:57:37 +0000 | [diff] [blame] | 91 | existing file. For \var{mode} is \code{'a'} and \var{file} |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 92 | refers to an existing ZIP file, then additional files are added to |
Fred Drake | bda3a59 | 2001-05-09 19:57:37 +0000 | [diff] [blame] | 93 | it. If \var{file} does not refer to a ZIP file, then a new ZIP |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 94 | archive is appended to the file. This is meant for adding a ZIP |
| 95 | archive to another file, such as \file{python.exe}. Using |
Fred Drake | 4278024 | 2000-10-02 20:56:30 +0000 | [diff] [blame] | 96 | |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 97 | \begin{verbatim} |
| 98 | cat myzip.zip >> python.exe |
| 99 | \end{verbatim} |
Fred Drake | 4278024 | 2000-10-02 20:56:30 +0000 | [diff] [blame] | 100 | |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 101 | also works, and at least \program{WinZip} can read such files. |
| 102 | \var{compression} is the ZIP compression method to use when writing |
| 103 | the archive, and should be \constant{ZIP_STORED} or |
| 104 | \constant{ZIP_DEFLATED}; unrecognized values will cause |
Fred Drake | e35360f | 2000-10-03 15:16:31 +0000 | [diff] [blame] | 105 | \exception{RuntimeError} to be raised. If \constant{ZIP_DEFLATED} |
Thomas Heller | 3d62f8c | 2002-01-14 08:37:39 +0000 | [diff] [blame] | 106 | is specified but the \refmodule{zlib} module is not available, |
Fred Drake | e35360f | 2000-10-03 15:16:31 +0000 | [diff] [blame] | 107 | \exception{RuntimeError} is also raised. The default is |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 108 | \constant{ZIP_STORED}. |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame^] | 109 | If \var{allowZip64} is \code{True} zipfile will create ZIP files that use |
| 110 | the ZIP64 extensions when the zipfile is larger than 2 GB. If it is |
| 111 | false (the default) \module{zipfile} will raise an exception when the |
| 112 | ZIP file would require ZIP64 extensions. ZIP64 extensions are disabled by |
| 113 | default because the default \program{zip} and \program{unzip} commands on |
| 114 | \UNIX{} (the InfoZIP utilities) don't support these extensions. |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 115 | \end{classdesc} |
| 116 | |
Fred Drake | e35360f | 2000-10-03 15:16:31 +0000 | [diff] [blame] | 117 | \begin{methoddesc}{close}{} |
| 118 | Close the archive file. You must call \method{close()} before |
| 119 | exiting your program or essential records will not be written. |
| 120 | \end{methoddesc} |
| 121 | |
| 122 | \begin{methoddesc}{getinfo}{name} |
| 123 | Return a \class{ZipInfo} object with information about the archive |
| 124 | member \var{name}. |
| 125 | \end{methoddesc} |
| 126 | |
Fred Drake | 4278024 | 2000-10-02 20:56:30 +0000 | [diff] [blame] | 127 | \begin{methoddesc}{infolist}{} |
| 128 | Return a list containing a \class{ZipInfo} object for each member of |
| 129 | the archive. The objects are in the same order as their entries in |
| 130 | the actual ZIP file on disk if an existing archive was opened. |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 131 | \end{methoddesc} |
| 132 | |
Fred Drake | 6fe9bac | 2000-10-11 18:56:00 +0000 | [diff] [blame] | 133 | \begin{methoddesc}{namelist}{} |
| 134 | Return a list of archive members by name. |
| 135 | \end{methoddesc} |
| 136 | |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 137 | \begin{methoddesc}{printdir}{} |
Fred Drake | 4278024 | 2000-10-02 20:56:30 +0000 | [diff] [blame] | 138 | Print a table of contents for the archive to \code{sys.stdout}. |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 139 | \end{methoddesc} |
| 140 | |
| 141 | \begin{methoddesc}{read}{name} |
| 142 | Return the bytes of the file in the archive. The archive must be |
| 143 | open for read or append. |
| 144 | \end{methoddesc} |
| 145 | |
Fred Drake | 4278024 | 2000-10-02 20:56:30 +0000 | [diff] [blame] | 146 | \begin{methoddesc}{testzip}{} |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame^] | 147 | Read all the files in the archive and check their CRC's and file |
| 148 | headers. Return the name of the first bad file, or else return \code{None}. |
Fred Drake | 4278024 | 2000-10-02 20:56:30 +0000 | [diff] [blame] | 149 | \end{methoddesc} |
| 150 | |
Fred Drake | e35360f | 2000-10-03 15:16:31 +0000 | [diff] [blame] | 151 | \begin{methoddesc}{write}{filename\optional{, arcname\optional{, |
| 152 | compress_type}}} |
Fred Drake | 4278024 | 2000-10-02 20:56:30 +0000 | [diff] [blame] | 153 | Write the file named \var{filename} to the archive, giving it the |
Fred Drake | e35360f | 2000-10-03 15:16:31 +0000 | [diff] [blame] | 154 | archive name \var{arcname} (by default, this will be the same as |
Georg Brandl | 8f7c54e | 2006-02-20 08:40:38 +0000 | [diff] [blame] | 155 | \var{filename}, but without a drive letter and with leading path |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 156 | separators removed). If given, \var{compress_type} overrides the |
| 157 | value given for the \var{compression} parameter to the constructor |
| 158 | for the new entry. The archive must be open with mode \code{'w'} |
| 159 | or \code{'a'}. |
| 160 | |
| 161 | \note{There is no official file name encoding for ZIP files. |
| 162 | If you have unicode file names, please convert them to byte strings |
| 163 | in your desired encoding before passing them to \method{write()}. |
| 164 | WinZip interprets all file names as encoded in CP437, also known |
| 165 | as DOS Latin.} |
| 166 | |
Georg Brandl | 8f7c54e | 2006-02-20 08:40:38 +0000 | [diff] [blame] | 167 | \note{Archive names should be relative to the archive root, that is, |
| 168 | they should not start with a path separator.} |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 169 | \end{methoddesc} |
| 170 | |
Just van Rossum | b083cb3 | 2002-12-12 12:23:32 +0000 | [diff] [blame] | 171 | \begin{methoddesc}{writestr}{zinfo_or_arcname, bytes} |
| 172 | Write the string \var{bytes} to the archive; \var{zinfo_or_arcname} |
| 173 | is either the file name it will be given in the archive, or a |
| 174 | \class{ZipInfo} instance. If it's an instance, at least the |
| 175 | filename, date, and time must be given. If it's a name, the date |
| 176 | and time is set to the current date and time. The archive must be |
| 177 | opened with mode \code{'w'} or \code{'a'}. |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 178 | \end{methoddesc} |
| 179 | |
Fred Drake | 4278024 | 2000-10-02 20:56:30 +0000 | [diff] [blame] | 180 | |
| 181 | The following data attribute is also available: |
| 182 | |
| 183 | \begin{memberdesc}{debug} |
| 184 | The level of debug output to use. This may be set from \code{0} |
| 185 | (the default, no output) to \code{3} (the most output). Debugging |
| 186 | information is written to \code{sys.stdout}. |
| 187 | \end{memberdesc} |
| 188 | |
| 189 | |
| 190 | \subsection{PyZipFile Objects \label{pyzipfile-objects}} |
| 191 | |
| 192 | The \class{PyZipFile} constructor takes the same parameters as the |
| 193 | \class{ZipFile} constructor. Instances have one method in addition to |
| 194 | those of \class{ZipFile} objects. |
| 195 | |
| 196 | \begin{methoddesc}[PyZipFile]{writepy}{pathname\optional{, basename}} |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 197 | Search for files \file{*.py} and add the corresponding file to the |
| 198 | archive. The corresponding file is a \file{*.pyo} file if |
| 199 | available, else a \file{*.pyc} file, compiling if necessary. If the |
| 200 | pathname is a file, the filename must end with \file{.py}, and just |
Fred Drake | 4278024 | 2000-10-02 20:56:30 +0000 | [diff] [blame] | 201 | the (corresponding \file{*.py[co]}) file is added at the top level |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 202 | (no path information). If it is a directory, and the directory is |
Fred Drake | 4278024 | 2000-10-02 20:56:30 +0000 | [diff] [blame] | 203 | not a package directory, then all the files \file{*.py[co]} are |
Fred Drake | 3c9f936 | 2000-03-31 17:51:10 +0000 | [diff] [blame] | 204 | added at the top level. If the directory is a package directory, |
| 205 | then all \file{*.py[oc]} are added under the package name as a file |
| 206 | path, and if any subdirectories are package directories, all of |
| 207 | these are added recursively. \var{basename} is intended for |
| 208 | internal use only. The \method{writepy()} method makes archives |
| 209 | with file names like this: |
| 210 | |
| 211 | \begin{verbatim} |
| 212 | string.pyc # Top level name |
| 213 | test/__init__.pyc # Package directory |
| 214 | test/testall.pyc # Module test.testall |
| 215 | test/bogus/__init__.pyc # Subpackage directory |
| 216 | test/bogus/myfile.pyc # Submodule test.bogus.myfile |
| 217 | \end{verbatim} |
| 218 | \end{methoddesc} |
| 219 | |
Fred Drake | 4278024 | 2000-10-02 20:56:30 +0000 | [diff] [blame] | 220 | |
| 221 | \subsection{ZipInfo Objects \label{zipinfo-objects}} |
| 222 | |
| 223 | Instances of the \class{ZipInfo} class are returned by the |
Fred Drake | e35360f | 2000-10-03 15:16:31 +0000 | [diff] [blame] | 224 | \method{getinfo()} and \method{infolist()} methods of |
Fred Drake | 4278024 | 2000-10-02 20:56:30 +0000 | [diff] [blame] | 225 | \class{ZipFile} objects. Each object stores information about a |
| 226 | single member of the ZIP archive. |
| 227 | |
| 228 | Instances have the following attributes: |
| 229 | |
| 230 | \begin{memberdesc}[ZipInfo]{filename} |
| 231 | Name of the file in the archive. |
| 232 | \end{memberdesc} |
| 233 | |
| 234 | \begin{memberdesc}[ZipInfo]{date_time} |
Raymond Hettinger | 999b57c | 2003-08-25 04:28:05 +0000 | [diff] [blame] | 235 | The time and date of the last modification to the archive |
Fred Drake | 4278024 | 2000-10-02 20:56:30 +0000 | [diff] [blame] | 236 | member. This is a tuple of six values: |
| 237 | |
| 238 | \begin{tableii}{c|l}{code}{Index}{Value} |
| 239 | \lineii{0}{Year} |
| 240 | \lineii{1}{Month (one-based)} |
| 241 | \lineii{2}{Day of month (one-based)} |
| 242 | \lineii{3}{Hours (zero-based)} |
| 243 | \lineii{4}{Minutes (zero-based)} |
| 244 | \lineii{5}{Seconds (zero-based)} |
| 245 | \end{tableii} |
| 246 | \end{memberdesc} |
| 247 | |
| 248 | \begin{memberdesc}[ZipInfo]{compress_type} |
| 249 | Type of compression for the archive member. |
| 250 | \end{memberdesc} |
| 251 | |
| 252 | \begin{memberdesc}[ZipInfo]{comment} |
| 253 | Comment for the individual archive member. |
| 254 | \end{memberdesc} |
| 255 | |
| 256 | \begin{memberdesc}[ZipInfo]{extra} |
| 257 | Expansion field data. The |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame^] | 258 | \citetitle[http://www.pkware.com/business_and_developers/developer/appnote/] |
| 259 | {PKZIP Application Note} contains some comments on the internal |
| 260 | structure of the data contained in this string. |
Fred Drake | 4278024 | 2000-10-02 20:56:30 +0000 | [diff] [blame] | 261 | \end{memberdesc} |
| 262 | |
| 263 | \begin{memberdesc}[ZipInfo]{create_system} |
| 264 | System which created ZIP archive. |
| 265 | \end{memberdesc} |
| 266 | |
| 267 | \begin{memberdesc}[ZipInfo]{create_version} |
| 268 | PKZIP version which created ZIP archive. |
| 269 | \end{memberdesc} |
| 270 | |
| 271 | \begin{memberdesc}[ZipInfo]{extract_version} |
| 272 | PKZIP version needed to extract archive. |
| 273 | \end{memberdesc} |
| 274 | |
| 275 | \begin{memberdesc}[ZipInfo]{reserved} |
| 276 | Must be zero. |
| 277 | \end{memberdesc} |
| 278 | |
| 279 | \begin{memberdesc}[ZipInfo]{flag_bits} |
| 280 | ZIP flag bits. |
| 281 | \end{memberdesc} |
| 282 | |
| 283 | \begin{memberdesc}[ZipInfo]{volume} |
| 284 | Volume number of file header. |
| 285 | \end{memberdesc} |
| 286 | |
| 287 | \begin{memberdesc}[ZipInfo]{internal_attr} |
| 288 | Internal attributes. |
| 289 | \end{memberdesc} |
| 290 | |
| 291 | \begin{memberdesc}[ZipInfo]{external_attr} |
| 292 | External file attributes. |
| 293 | \end{memberdesc} |
| 294 | |
| 295 | \begin{memberdesc}[ZipInfo]{header_offset} |
| 296 | Byte offset to the file header. |
| 297 | \end{memberdesc} |
| 298 | |
Fred Drake | 4278024 | 2000-10-02 20:56:30 +0000 | [diff] [blame] | 299 | \begin{memberdesc}[ZipInfo]{CRC} |
| 300 | CRC-32 of the uncompressed file. |
| 301 | \end{memberdesc} |
| 302 | |
| 303 | \begin{memberdesc}[ZipInfo]{compress_size} |
| 304 | Size of the compressed data. |
| 305 | \end{memberdesc} |
| 306 | |
| 307 | \begin{memberdesc}[ZipInfo]{file_size} |
| 308 | Size of the uncompressed file. |
| 309 | \end{memberdesc} |