blob: d836500672f5ae4c7c6170e76c87eac13f111f7f [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001\section{\module{bz2} ---
2 Compression compatible with \program{bzip2}}
3
4\declaremodule{builtin}{bz2}
5\modulesynopsis{Interface to compression and decompression
6 routines compatible with \program{bzip2}.}
7\moduleauthor{Gustavo Niemeyer}{niemeyer@conectiva.com}
8\sectionauthor{Gustavo Niemeyer}{niemeyer@conectiva.com}
9
10\versionadded{2.3}
11
12This module provides a comprehensive interface for the bz2 compression library.
13It implements a complete file interface, one-shot (de)compression functions,
14and types for sequential (de)compression.
15
16Here is a resume of the features offered by the bz2 module:
17
18\begin{itemize}
19\item \class{BZ2File} class implements a complete file interface, including
Fred Drake3a2cda82002-11-15 16:38:06 +000020 \method{readline()}, \method{readlines()},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000021 \method{writelines()}, \method{seek()}, etc;
22\item \class{BZ2File} class implements emulated \method{seek()} support;
23\item \class{BZ2File} class implements universal newline support;
24\item \class{BZ2File} class offers an optimized line iteration using
25 the readahead algorithm borrowed from file objects;
Neal Norwitz110aa502002-11-05 23:55:27 +000026\item \class{BZ2File} class inherits from the builtin file type
Fred Drake18c7d982002-11-05 17:54:02 +000027 (\code{issubclass(BZ2File, file)} is \code{True});
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000028\item Sequential (de)compression supported by \class{BZ2Compressor} and
29 \class{BZ2Decompressor} classes;
30\item One-shot (de)compression supported by \function{compress()} and
31 \function{decompress()} functions;
32\item Thread safety uses individual locking mechanism;
33\item Complete inline documentation;
34\end{itemize}
35
36
37\subsection{(De)compression of files}
38
39Handling of compressed files is offered by the \class{BZ2File} class.
40
Fred Drake18c7d982002-11-05 17:54:02 +000041\begin{classdesc}{BZ2File}{filename\optional{, mode\optional{,
42 buffering\optional{, compresslevel}}}}
43Open a bz2 file. Mode can be either \code{'r'} or \code{'w'}, for reading
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000044(default) or writing. When opened for writing, the file will be created if
Fred Drake18c7d982002-11-05 17:54:02 +000045it doesn't exist, and truncated otherwise. If \var{buffering} is given,
46\code{0} means unbuffered, and larger numbers specify the buffer size;
47the default is \code{0}. If
Gustavo Niemeyer057ab142002-11-25 18:51:43 +000048\var{compresslevel} is given, it must be a number between \code{1} and
Fred Drake18c7d982002-11-05 17:54:02 +000049\code{9}; the default is \code{9}.
Fred Drake3a2cda82002-11-15 16:38:06 +000050Add a \character{U} to mode to open the file for input with universal newline
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000051support. Any line ending in the input file will be seen as a
Fred Drake3a2cda82002-11-15 16:38:06 +000052\character{\e n} in Python. Also, a file so opened gains the
53attribute \member{newlines}; the value for this attribute is one of
54\code{None} (no newline read yet), \code{'\e r'}, \code{'\e n'},
55\code{'\e r\e n'} or a tuple containing all the newline types
56seen. Universal newlines are available only when reading.
57Instances support iteration in the same way as normal \class{file}
58instances.
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000059\end{classdesc}
60
61\begin{methoddesc}[BZ2File]{close}{}
62Close the file. Sets data attribute \member{closed} to true. A closed file
63cannot be used for further I/O operations. \method{close()} may be called
64more than once without error.
65\end{methoddesc}
66
67\begin{methoddesc}[BZ2File]{read}{\optional{size}}
68Read at most \var{size} uncompressed bytes, returned as a string. If the
69\var{size} argument is negative or omitted, read until EOF is reached.
70\end{methoddesc}
71
72\begin{methoddesc}[BZ2File]{readline}{\optional{size}}
73Return the next line from the file, as a string, retaining newline.
74A non-negative \var{size} argument limits the maximum number of bytes to
75return (an incomplete line may be returned then). Return an empty
76string at EOF.
77\end{methoddesc}
78
79\begin{methoddesc}[BZ2File]{readlines}{\optional{size}}
80Return a list of lines read. The optional \var{size} argument, if given,
81is an approximate bound on the total number of bytes in the lines returned.
82\end{methoddesc}
83
84\begin{methoddesc}[BZ2File]{xreadlines}{}
85For backward compatibility. \class{BZ2File} objects now include the
Fred Drake3a2cda82002-11-15 16:38:06 +000086performance optimizations previously implemented in the
87\refmodule{xreadlines} module.
88\deprecated{2.3}{This exists only for compatibility with the method by
89 this name on \class{file} objects, which is
90 deprecated. Use \code{for line in file} instead.}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000091\end{methoddesc}
92
Fred Drake18c7d982002-11-05 17:54:02 +000093\begin{methoddesc}[BZ2File]{seek}{offset\optional{, whence}}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000094Move to new file position. Argument \var{offset} is a byte count. Optional
95argument \var{whence} defaults to \code{0} (offset from start of file,
96offset should be \code{>= 0}); other values are \code{1} (move relative to
97current position, positive or negative), and \code{2} (move relative to end
98of file, usually negative, although many platforms allow seeking beyond
99the end of a file).
100
101Note that seeking of bz2 files is emulated, and depending on the parameters
102the operation may be extremely slow.
103\end{methoddesc}
104
105\begin{methoddesc}[BZ2File]{tell}{}
106Return the current file position, an integer (may be a long integer).
107\end{methoddesc}
108
109\begin{methoddesc}[BZ2File]{write}{data}
110Write string \var{data} to file. Note that due to buffering, \method{close()}
111may be needed before the file on disk reflects the data written.
112\end{methoddesc}
113
114\begin{methoddesc}[BZ2File]{writelines}{sequence_of_strings}
115Write the sequence of strings to the file. Note that newlines are not added.
116The sequence can be any iterable object producing strings. This is equivalent
117to calling write() for each string.
118\end{methoddesc}
119
120
121\subsection{Sequential (de)compression}
122
123Sequential compression and decompression is done using the classes
124\class{BZ2Compressor} and \class{BZ2Decompressor}.
125
Fred Drake18c7d982002-11-05 17:54:02 +0000126\begin{classdesc}{BZ2Compressor}{\optional{compresslevel}}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000127Create a new compressor object. This object may be used to compress
128data sequentially. If you want to compress data in one shot, use the
129\function{compress()} function instead. The \var{compresslevel} parameter,
Fred Drake18c7d982002-11-05 17:54:02 +0000130if given, must be a number between \code{1} and \code{9}; the default
131is \code{9}.
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000132\end{classdesc}
133
134\begin{methoddesc}[BZ2Compressor]{compress}{data}
135Provide more data to the compressor object. It will return chunks of compressed
136data whenever possible. When you've finished providing data to compress, call
137the \method{flush()} method to finish the compression process, and return what
138is left in internal buffers.
139\end{methoddesc}
140
141\begin{methoddesc}[BZ2Compressor]{flush}{}
142Finish the compression process and return what is left in internal buffers. You
143must not use the compressor object after calling this method.
144\end{methoddesc}
145
146\begin{classdesc}{BZ2Decompressor}{}
147Create a new decompressor object. This object may be used to decompress
148data sequentially. If you want to decompress data in one shot, use the
149\function{decompress()} function instead.
150\end{classdesc}
151
152\begin{methoddesc}[BZ2Decompressor]{decompress}{data}
153Provide more data to the decompressor object. It will return chunks of
154decompressed data whenever possible. If you try to decompress data after the
155end of stream is found, \exception{EOFError} will be raised. If any data was
156found after the end of stream, it'll be ignored and saved in
157\member{unused\_data} attribute.
158\end{methoddesc}
159
160
161\subsection{One-shot (de)compression}
162
Neal Norwitz110aa502002-11-05 23:55:27 +0000163One-shot compression and decompression is provided through the
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000164\function{compress()} and \function{decompress()} functions.
165
Fred Drake18c7d982002-11-05 17:54:02 +0000166\begin{funcdesc}{compress}{data\optional{, compresslevel}}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000167Compress \var{data} in one shot. If you want to compress data sequentially,
168use an instance of \class{BZ2Compressor} instead. The \var{compresslevel}
Fred Drake18c7d982002-11-05 17:54:02 +0000169parameter, if given, must be a number between \code{1} and \code{9};
170the default is \code{9}.
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000171\end{funcdesc}
172
Fred Drake18c7d982002-11-05 17:54:02 +0000173\begin{funcdesc}{decompress}{data}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000174Decompress \var{data} in one shot. If you want to decompress data
175sequentially, use an instance of \class{BZ2Decompressor} instead.
176\end{funcdesc}