blob: a05a9f446b8c60c815a19afcafe9728fa839b39c [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001\section{\module{tarfile} --- Read and write tar archive files}
2
3\declaremodule{standard}{tarfile}
4\modulesynopsis{Read and write tar-format archive files.}
5\versionadded{2.3}
6
7\moduleauthor{Lars Gust\"abel}{lars@gustaebel.de}
8\sectionauthor{Lars Gust\"abel}{lars@gustaebel.de}
9
10The \module{tarfile} module makes it possible to read and create tar archives.
11Some facts and figures:
12
13\begin{itemize}
14\item reads and writes \module{gzip} and \module{bzip2} compressed archives.
15\item creates POSIX 1003.1-1990 compliant or GNU tar compatible archives.
16\item reads GNU tar extensions \emph{longname}, \emph{longlink} and
17 \emph{sparse}.
18\item stores pathnames of unlimited length using GNU tar extensions.
19\item handles directories, regular files, hardlinks, symbolic links, fifos,
20 character devices and block devices and is able to acquire and
21 restore file information like timestamp, access permissions and owner.
22\item can handle tape devices.
23\end{itemize}
24
25\begin{funcdesc}{open}{\optional{name\optional{, mode
26 \optional{, fileobj\optional{, bufsize}}}}}
27 Return a \class{TarFile} object for the pathname \var{name}.
28 For detailed information on \class{TarFile} objects,
29 see \citetitle{TarFile Objects} (section \ref{tarfile-objects}).
30
31 \var{mode} has to be a string of the form \code{'filemode[:compression]'},
32 it defaults to \code{'r'}. Here is a full list of mode combinations:
33
34 \begin{tableii}{c|l}{code}{mode}{action}
35 \lineii{'r'}{Open for reading with transparent compression (recommended).}
36 \lineii{'r:'}{Open for reading exclusively without compression.}
37 \lineii{'r:gz'}{Open for reading with gzip compression.}
38 \lineii{'r:bz2'}{Open for reading with bzip2 compression.}
39 \lineii{'a' or 'a:'}{Open for appending with no compression.}
40 \lineii{'w' or 'w:'}{Open for uncompressed writing.}
41 \lineii{'w:gz'}{Open for gzip compressed writing.}
42 \lineii{'w:bz2'}{Open for bzip2 compressed writing.}
43 \end{tableii}
44
45 Note that \code{'a:gz'} or \code{'a:bz2'} is not possible.
46 If \var{mode} is not suitable to open a certain (compressed) file for
47 reading, \exception{ReadError} is raised. Use \var{mode} \code{'r'} to
48 avoid this. If a compression method is not supported,
49 \exception{CompressionError} is raised.
50
51 If \var{fileobj} is specified, it is used as an alternative to
52 a file object opened for \var{name}.
53
54 For special purposes, there is a second format for \var{mode}:
55 \code{'filemode|[compression]'}. \code{open} will return a \class{TarFile}
56 object that processes its data as a stream of blocks. No random
57 seeking will be done on the file. If given, \var{fileobj} may be any
58 object that has a \code{read()} resp. \code{write()} method.
59 \var{bufsize} specifies the blocksize and defaults to \code{20 * 512}
60 bytes. Use this variant in combination with e.g. \code{sys.stdin}, a socket
61 file object or a tape device.
62 However, such a \class{TarFile} object is limited in that it does not allow
63 to be accessed randomly, see \citetitle{Examples} (section
64 \ref{tar-examples}).
65 The currently possible modes:
66
67 \begin{tableii}{c|l}{code}{mode}{action}
68 \lineii{'r|'}{Open a \emph{stream} of uncompressed tar blocks for reading.}
69 \lineii{'r|gz'}{Open a gzip compressed \emph{stream} for reading.}
70 \lineii{'r|bz2'}{Open a bzip2 compressed \emph{stream} for reading.}
71 \lineii{'w|'}{Open an uncompressed \emph{stream} for writing.}
72 \lineii{'w|gz'}{Open an gzip compressed \emph{stream} for writing.}
73 \lineii{'w|bz2'}{Open an bzip2 compressed \emph{stream} for writing.}
74 \end{tableii}
75\end{funcdesc}
76
77\begin{classdesc*}{TarFile}
78 Class for reading and writing tar archives. Do not use this
79 class directly, better use \function{open()} instead.
80 See \citetitle{TarFile Objects} (section \ref{tarfile-objects}).
81\end{classdesc*}
82
83\begin{funcdesc}{is_tarfile}{name}
84 Return \code{True} if \var{name} is a tar archive file, that the
85 \module{tarfile} module can read.
86\end{funcdesc}
87
88\begin{classdesc}{TarFileCompat}{filename\optional{, mode\optional{,
89 compression}}}
90
91 Class for limited access to tar archives with a \code{zipfile}-like
92 interface. Please consult the documentation of \code{zipfile} for more
93 details.
94 \code{compression} must be one of the following constants:
95 \begin{datadesc}{TAR_PLAIN}
96 Constant for an uncompressed tar archive.
97 \end{datadesc}
98 \begin{datadesc}{TAR_GZIPPED}
99 Constant for a \code{gzip} compressed tar archive.
100 \end{datadesc}
101\end{classdesc}
102
103\begin{excdesc}{TarError}
104 Base class for all \module{tarfile} exceptions.
105\end{excdesc}
106
107\begin{excdesc}{ReadError}
108 Is raised when a tar archive is opened, that either cannot be handled by
109 the \module{tarfile} module or is somehow invalid.
110\end{excdesc}
111
112\begin{excdesc}{CompressionError}
113 Is raised when a compression method is not supported or when the data
114 cannot be decoded properly.
115\end{excdesc}
116
117\begin{excdesc}{StreamError}
118 Is raised for the limitations that are typical for stream-like
119 \class{TarFile} objects.
120\end{excdesc}
121
122\begin{excdesc}{ExtractError}
123 Is raised for \emph{non-fatal} errors when using \method{extract()}, but
124 only if \member{TarFile.errorlevel}\code{ == 2}.
125\end{excdesc}
126
127\begin{seealso}
128 \seemodule[module-zipfile]{zipfile}{Documentation of the \code{zipfile}
129 standard module.}
130
131 \seetitle[http://www.gnu.org/manual/tar/html_chapter/tar_8.html\#SEC118]
132 {GNU tar manual, Standard Section}{Documentation for tar archive files,
133 including GNU tar extensions.}
134\end{seealso}
135
136%-----------------
137% TarFile Objects
138%-----------------
139
140\subsection{TarFile Objects \label{tarfile-objects}}
141
142The \class{TarFile} object provides an interface to a tar archive. A tar
143archive is a sequence of blocks. An archive member (a stored file) is made up
144of a header block followed by data blocks. It is possible, to store a file in a
145tar archive several times. Each archive member is represented by a
146\class{TarInfo} object, see \citetitle{TarInfo Objects} (section
147\ref{tarinfo-objects}) for details.
148
149\begin{classdesc}{TarFile}{\optional{name
150 \optional{, mode\optional{, fileobj}}}}
151 Open an \emph{(uncompressed)} tar archive \var{name}.
152 \var{mode} is either \code{'r'} to read from an existing archive,
153 \code{'a'} to append data to an existing file or \code{'w'} to create a new
154 file overwriting an existing one. \var{mode} defaults to \code{'r'}.
155
156 If \var{fileobj} is given, it is used for reading or writing data.
157 If it can be determined, \var{mode} is overridden by \var{fileobj}'s mode.
158 \begin{notice}
159 \var{fileobj} is not closed, when \class{TarFile} is closed.
160 \end{notice}
161\end{classdesc}
162
163\begin{methoddesc}{open}{...}
164 Alternative constructor. The \function{open()} function on module level is
165 actually a shortcut to this classmethod. See section \ref{module-tarfile}
166 for details.
167\end{methoddesc}
168
169\begin{methoddesc}{getmember}{name}
170 Return a \class{TarInfo} object for member \var{name}. If \var{name} can
171 not be found in the archive, \exception{KeyError} is raised.
172 \begin{notice}
173 If a member occurs more than once in the archive, its last
174 occurence is assumed to be the most up-to-date version.
175 \end{notice}
176\end{methoddesc}
177
178\begin{methoddesc}{getmembers}{}
179 Return the members of the archive as a list of \class{TarInfo} objects.
180 The list has the same order as the members in the archive.
181\end{methoddesc}
182
183\begin{methoddesc}{getnames}{}
184 Return the members as a list of their names. It has the same order as
185 the list returned by \method{getmembers()}.
186\end{methoddesc}
187
188\begin{methoddesc}{list}{verbose=True}
189 Print a table of contents to \code{sys.stdout}. If \var{verbose} is
190 \code{False}, only the names of the members are printed. If it is
191 \code{True}, an \code{"ls -l"}-like output is produced.
192\end{methoddesc}
193
194\begin{methoddesc}{next}{}
195 Return the next member of the archive as a \class{TarInfo} object, when
196 \class{TarFile} is opened for reading. Return \code{None} if there is no
197 more available.
198\end{methoddesc}
199
200\begin{methoddesc}{extract}{member\optional{, path}}
201 Extract a member from the archive to the current working directory,
202 using its full name. Its file information is extracted as accurately as
203 possible.
204 \var{member} may be a filename or a \class{TarInfo} object.
205 You can specify a different directory using \var{path}.
206\end{methoddesc}
207
208\begin{methoddesc}{extractfile}{member}
209 Extract a member from the archive as a file object.
210 \var{member} may be a filename or a \class{TarInfo} object.
211 If \var{member} is a regular file, a file-like object is returned.
212 If \var{member} is a link, a file-like object is constructed from the
213 link's target.
214 If \var{member} is none of the above, \code{None} is returned.
215 \begin{notice}
216 The file-like object is read-only and provides the following methods:
217 \method{read()}, \method{readline()}, \method{readlines()},
218 \method{seek()}, \method{tell()}.
219 \end{notice}
220\end{methoddesc}
221
222\begin{methoddesc}{add}{name\optional{, arcname\optional{, recursive=True}}}
223 Add the file \var{name} to the archive. \var{name} may be any type
224 of file (directory, fifo, symbolic link, etc.).
225 If given, \var{arcname} specifies an alternative name for the file in the
226 archive. Directories are added recursively by default.
227 This can be avoided by setting \var{recursive} to \code{False}.
228\end{methoddesc}
229
230\begin{methoddesc}{addfile}{tarinfo\optional{, fileobj}}
231 Add the \class{TarInfo} object \var{tarinfo} to the archive.
232 If \var{fileobj} is given, \code{tarinfo.size} bytes are read
233 from it and added to the archive. You can create \class{TarInfo} objects
234 using \method{gettarinfo()}.
235 \begin{notice}
236 On Windows platforms, \var{fileobj} should always be opened with mode
237 \code{'rb'} to avoid irritation about the file size.
238 \end{notice}
239\end{methoddesc}
240
241\begin{methoddesc}{gettarinfo}{\optional{name\optional{, arcname
242 \optional{, fileobj}}}}
243 Create a \class{TarInfo} object for either the file \var{name} or the
244 file object \var{fileobj} (using \code{os.fstat()} on its file descriptor).
245 You can modify some of the \class{TarInfo}'s attributes before you add it
246 using \method{addfile()}.
247 If given, \var{arcname} specifies an alternative name for the file in the
248 archive.
249\end{methoddesc}
250
251\begin{methoddesc}{close}{}
252 Close the \class{TarFile}. In write-mode, two finishing zero blocks are
253 appended to the archive.
254\end{methoddesc}
255
256\begin{memberdesc}{posix=True}
257 If \code{True}, create a POSIX 1003.1-1990 compliant archive. GNU
258 extensions are not used, because they are not part of the POSIX standard.
259 This limits the length of filenames to at most 256 and linknames to 100
260 characters. A \exception{ValueError} is raised, if a pathname exceeds this
261 limit.
262 If \code{False}, create a GNU tar compatible archive. It will not be POSIX
263 compliant, but can store pathnames of unlimited length.
264\end{memberdesc}
265
266\begin{memberdesc}{dereference=False}
267 If \code{False}, add symbolic and hard links to archive. If \code{True},
268 add the content of the target files to the archive. This has no effect on
269 systems that do not support links.
270\end{memberdesc}
271
272\begin{memberdesc}{ignore_zeros=False}
273 If \code{False}, treat an empty block as the end of the archive. If
274 \code{True}, skip empty (and invalid) blocks and try to get as many
275 members as possible. This is only useful for concatenated or damaged
276 archives.
277\end{memberdesc}
278
279\begin{memberdesc}{debug=0}
280 To be set from \code{0}(no debug messages) up to \code{3}(all debug
281 messages). The messages are written to \code{sys.stdout}.
282\end{memberdesc}
283
284\begin{memberdesc}{errorlevel=0}
285 If \code{0}, all errors are ignored when using \method{extract()}.
286 Nevertheless, they appear as error messages in the debug output, when
287 debugging is enabled.
288 If \code{1}, all \emph{fatal} errors are raised as \exception{OSError}
289 or \exception{IOError} exceptions.
290 If \code{2}, all \emph{non-fatal} errors are raised as \exception{TarError}
291 exceptions as well.
292\end{memberdesc}
293
294%-----------------
295% TarInfo Objects
296%-----------------
297
298\subsection{TarInfo Objects \label{tarinfo-objects}}
299
300A \class{TarInfo} object represents one member in a \class{TarFile}. Aside from
301storing all required attributes of a file (like file type, size, time,
302permissions, owner etc.), it provides some useful methods to determine its
303type. It does \emph{not} contain the file's data itself.
304
305\class{TarInfo} objects are returned by \code{TarFile}'s methods
306\code{getmember()}, \code{getmembers()} and \code{gettarinfo()}.
307
308\begin{classdesc}{TarInfo}{\optional{name}}
309 Create a \class{TarInfo} object.
310\end{classdesc}
311
312\begin{methoddesc}{frombuf}{}
313 Create and return a \class{TarInfo} object from a string buffer.
314\end{methoddesc}
315
316\begin{methoddesc}{tobuf}{}
317 Create a string buffer from a \class{TarInfo} object.
318\end{methoddesc}
319
320A \code{TarInfo} object has the following public data attributes:
321\begin{memberdesc}{name}
322 Name of the archive member.
323\end{memberdesc}
324
325\begin{memberdesc}{size}
326 Size in bytes.
327\end{memberdesc}
328
329\begin{memberdesc}{mtime}
330 Time of last modification.
331\end{memberdesc}
332
333\begin{memberdesc}{mode}
334 Permission bits.
335\end{memberdesc}
336
337\begin{memberdesc}{type}
338 File type.
339 \var{type} is usually one of these constants:
340 \code{REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, CONTTYPE,
341 CHRTYPE, BLKTYPE, GNUTYPE_SPARSE}.
342 To determine the type of a \class{TarInfo} object more conveniently, use
343 the \code{is_*()} methods below.
344\end{memberdesc}
345
346\begin{memberdesc}{linkname}
347 Name of the target file name, which is only present in \class{TarInfo}
348 objects of type LNKTYPE and SYMTYPE.
349\end{memberdesc}
350
351\begin{memberdesc}{uid, gid}
352 User and group ID of who originally stored this member.
353\end{memberdesc}
354
355\begin{memberdesc}{uname, gname}
356 User and group name.
357\end{memberdesc}
358
359A \class{TarInfo} object also provides some convenient query methods:
360\begin{methoddesc}{isfile}{}
361 Return \code{True} if the \class{Tarinfo} object is a regular file.
362\end{methoddesc}
363
364\begin{methoddesc}{isreg}{}
365 Same as \method{isfile()}.
366\end{methoddesc}
367
368\begin{methoddesc}{isdir}{}
369 Return \code{True} if it is a directory.
370\end{methoddesc}
371
372\begin{methoddesc}{issym}{}
373 Return \code{True} if it is a symbolic link.
374\end{methoddesc}
375
376\begin{methoddesc}{islnk}{}
377 Return \code{True} if it is a hard link.
378\end{methoddesc}
379
380\begin{methoddesc}{ischr}{}
381 Return \code{True} if it is a character device.
382\end{methoddesc}
383
384\begin{methoddesc}{isblk}{}
385 Return \code{True} if it is a block device.
386\end{methoddesc}
387
388\begin{methoddesc}{isfifo}{}
389 Return \code{True} if it is a FIFO.
390\end{methoddesc}
391
392\begin{methoddesc}{isdev}{}
393 Return \code{True} if it is one of character device, block device or FIFO.
394\end{methoddesc}
395
396%------------------------
397% Examples
398%------------------------
399
400\subsection{Examples \label{tar-examples}}
401
402How to create an uncompressed tar archive from a list of filenames:
403\begin{verbatim}
404import tarfile
405tar = tarfile.open("sample.tar", "w")
406for name in ["foo", "bar", "quux"]:
407 tar.add(name)
408tar.close()
409\end{verbatim}
410
411How to read a gzip compressed tar archive and display some member information:
412\begin{verbatim}
413import tarfile
414tar = tarfile.open("sample.tar.gz", "r:gz")
415for tarinfo in tar:
416 print tarinfo.name, "is", tarinfo.size, "bytes in size and is",
417 if tarinfo.isreg():
418 print "a regular file."
419 elif tarinfo.isdir():
420 print "a directory."
421 else:
422 print "something else."
423tar.close()
424\end{verbatim}
425
426How to create a tar archive with faked information:
427\begin{verbatim}
428import tarfile
429tar = tarfile.open("sample.tar.gz", "w:gz")
430for name in namelist:
431 tarinfo = tar.gettarinfo(name, "fakeproj-1.0/" + name)
432 tarinfo.uid = 123
433 tarinfo.gid = 456
434 tarinfo.uname = "johndoe"
435 tarinfo.gname = "fake"
436 tar.addfile(tarinfo, file(name))
437tar.close()
438\end{verbatim}
439
440The \emph{only} way to extract an uncompressed tar stream from
441\code{sys.stdin}:
442\begin{verbatim}
443import sys
444import tarfile
445tar = tarfile.open(mode="r|", fileobj=sys.stdin)
446for tarinfo in tar:
447 tar.extract(tarinfo)
448tar.close()
449\end{verbatim}
450