blob: 439f2ba3370e24b53e2b68f63144e0175903abe2 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001\section{\module{bz2} ---
2 Compression compatible with \program{bzip2}}
3
4\declaremodule{builtin}{bz2}
5\modulesynopsis{Interface to compression and decompression
6 routines compatible with \program{bzip2}.}
7\moduleauthor{Gustavo Niemeyer}{niemeyer@conectiva.com}
8\sectionauthor{Gustavo Niemeyer}{niemeyer@conectiva.com}
9
10\versionadded{2.3}
11
12This module provides a comprehensive interface for the bz2 compression library.
13It implements a complete file interface, one-shot (de)compression functions,
14and types for sequential (de)compression.
15
16Here is a resume of the features offered by the bz2 module:
17
18\begin{itemize}
19\item \class{BZ2File} class implements a complete file interface, including
Fred Drake3a2cda82002-11-15 16:38:06 +000020 \method{readline()}, \method{readlines()},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000021 \method{writelines()}, \method{seek()}, etc;
22\item \class{BZ2File} class implements emulated \method{seek()} support;
23\item \class{BZ2File} class implements universal newline support;
24\item \class{BZ2File} class offers an optimized line iteration using
25 the readahead algorithm borrowed from file objects;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000026\item Sequential (de)compression supported by \class{BZ2Compressor} and
27 \class{BZ2Decompressor} classes;
28\item One-shot (de)compression supported by \function{compress()} and
29 \function{decompress()} functions;
30\item Thread safety uses individual locking mechanism;
31\item Complete inline documentation;
32\end{itemize}
33
34
35\subsection{(De)compression of files}
36
37Handling of compressed files is offered by the \class{BZ2File} class.
38
Fred Drake18c7d982002-11-05 17:54:02 +000039\begin{classdesc}{BZ2File}{filename\optional{, mode\optional{,
40 buffering\optional{, compresslevel}}}}
41Open a bz2 file. Mode can be either \code{'r'} or \code{'w'}, for reading
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000042(default) or writing. When opened for writing, the file will be created if
Fred Drake18c7d982002-11-05 17:54:02 +000043it doesn't exist, and truncated otherwise. If \var{buffering} is given,
44\code{0} means unbuffered, and larger numbers specify the buffer size;
45the default is \code{0}. If
Gustavo Niemeyer057ab142002-11-25 18:51:43 +000046\var{compresslevel} is given, it must be a number between \code{1} and
Fred Drake18c7d982002-11-05 17:54:02 +000047\code{9}; the default is \code{9}.
Fred Drake3a2cda82002-11-15 16:38:06 +000048Add a \character{U} to mode to open the file for input with universal newline
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049support. Any line ending in the input file will be seen as a
Fred Drake3a2cda82002-11-15 16:38:06 +000050\character{\e n} in Python. Also, a file so opened gains the
51attribute \member{newlines}; the value for this attribute is one of
52\code{None} (no newline read yet), \code{'\e r'}, \code{'\e n'},
53\code{'\e r\e n'} or a tuple containing all the newline types
54seen. Universal newlines are available only when reading.
55Instances support iteration in the same way as normal \class{file}
56instances.
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000057\end{classdesc}
58
59\begin{methoddesc}[BZ2File]{close}{}
60Close the file. Sets data attribute \member{closed} to true. A closed file
61cannot be used for further I/O operations. \method{close()} may be called
62more than once without error.
63\end{methoddesc}
64
65\begin{methoddesc}[BZ2File]{read}{\optional{size}}
66Read at most \var{size} uncompressed bytes, returned as a string. If the
67\var{size} argument is negative or omitted, read until EOF is reached.
68\end{methoddesc}
69
70\begin{methoddesc}[BZ2File]{readline}{\optional{size}}
71Return the next line from the file, as a string, retaining newline.
72A non-negative \var{size} argument limits the maximum number of bytes to
73return (an incomplete line may be returned then). Return an empty
74string at EOF.
75\end{methoddesc}
76
77\begin{methoddesc}[BZ2File]{readlines}{\optional{size}}
78Return a list of lines read. The optional \var{size} argument, if given,
79is an approximate bound on the total number of bytes in the lines returned.
80\end{methoddesc}
81
82\begin{methoddesc}[BZ2File]{xreadlines}{}
83For backward compatibility. \class{BZ2File} objects now include the
Fred Drake3a2cda82002-11-15 16:38:06 +000084performance optimizations previously implemented in the
85\refmodule{xreadlines} module.
86\deprecated{2.3}{This exists only for compatibility with the method by
87 this name on \class{file} objects, which is
88 deprecated. Use \code{for line in file} instead.}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000089\end{methoddesc}
90
Fred Drake18c7d982002-11-05 17:54:02 +000091\begin{methoddesc}[BZ2File]{seek}{offset\optional{, whence}}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000092Move to new file position. Argument \var{offset} is a byte count. Optional
93argument \var{whence} defaults to \code{0} (offset from start of file,
94offset should be \code{>= 0}); other values are \code{1} (move relative to
95current position, positive or negative), and \code{2} (move relative to end
96of file, usually negative, although many platforms allow seeking beyond
97the end of a file).
98
99Note that seeking of bz2 files is emulated, and depending on the parameters
100the operation may be extremely slow.
101\end{methoddesc}
102
103\begin{methoddesc}[BZ2File]{tell}{}
104Return the current file position, an integer (may be a long integer).
105\end{methoddesc}
106
107\begin{methoddesc}[BZ2File]{write}{data}
108Write string \var{data} to file. Note that due to buffering, \method{close()}
109may be needed before the file on disk reflects the data written.
110\end{methoddesc}
111
112\begin{methoddesc}[BZ2File]{writelines}{sequence_of_strings}
113Write the sequence of strings to the file. Note that newlines are not added.
114The sequence can be any iterable object producing strings. This is equivalent
115to calling write() for each string.
116\end{methoddesc}
117
118
119\subsection{Sequential (de)compression}
120
121Sequential compression and decompression is done using the classes
122\class{BZ2Compressor} and \class{BZ2Decompressor}.
123
Fred Drake18c7d982002-11-05 17:54:02 +0000124\begin{classdesc}{BZ2Compressor}{\optional{compresslevel}}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000125Create a new compressor object. This object may be used to compress
126data sequentially. If you want to compress data in one shot, use the
127\function{compress()} function instead. The \var{compresslevel} parameter,
Fred Drake18c7d982002-11-05 17:54:02 +0000128if given, must be a number between \code{1} and \code{9}; the default
129is \code{9}.
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000130\end{classdesc}
131
132\begin{methoddesc}[BZ2Compressor]{compress}{data}
133Provide more data to the compressor object. It will return chunks of compressed
134data whenever possible. When you've finished providing data to compress, call
135the \method{flush()} method to finish the compression process, and return what
136is left in internal buffers.
137\end{methoddesc}
138
139\begin{methoddesc}[BZ2Compressor]{flush}{}
140Finish the compression process and return what is left in internal buffers. You
141must not use the compressor object after calling this method.
142\end{methoddesc}
143
144\begin{classdesc}{BZ2Decompressor}{}
145Create a new decompressor object. This object may be used to decompress
146data sequentially. If you want to decompress data in one shot, use the
147\function{decompress()} function instead.
148\end{classdesc}
149
150\begin{methoddesc}[BZ2Decompressor]{decompress}{data}
151Provide more data to the decompressor object. It will return chunks of
152decompressed data whenever possible. If you try to decompress data after the
153end of stream is found, \exception{EOFError} will be raised. If any data was
154found after the end of stream, it'll be ignored and saved in
155\member{unused\_data} attribute.
156\end{methoddesc}
157
158
159\subsection{One-shot (de)compression}
160
Neal Norwitz110aa502002-11-05 23:55:27 +0000161One-shot compression and decompression is provided through the
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000162\function{compress()} and \function{decompress()} functions.
163
Fred Drake18c7d982002-11-05 17:54:02 +0000164\begin{funcdesc}{compress}{data\optional{, compresslevel}}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000165Compress \var{data} in one shot. If you want to compress data sequentially,
166use an instance of \class{BZ2Compressor} instead. The \var{compresslevel}
Fred Drake18c7d982002-11-05 17:54:02 +0000167parameter, if given, must be a number between \code{1} and \code{9};
168the default is \code{9}.
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000169\end{funcdesc}
170
Fred Drake18c7d982002-11-05 17:54:02 +0000171\begin{funcdesc}{decompress}{data}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000172Decompress \var{data} in one shot. If you want to decompress data
173sequentially, use an instance of \class{BZ2Decompressor} instead.
174\end{funcdesc}