blob: 36bc0d2e7d822769f9b7668359648766c820c3c8 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001\section{\module{bz2} ---
2 Compression compatible with \program{bzip2}}
3
4\declaremodule{builtin}{bz2}
5\modulesynopsis{Interface to compression and decompression
6 routines compatible with \program{bzip2}.}
7\moduleauthor{Gustavo Niemeyer}{niemeyer@conectiva.com}
8\sectionauthor{Gustavo Niemeyer}{niemeyer@conectiva.com}
9
10\versionadded{2.3}
11
12This module provides a comprehensive interface for the bz2 compression library.
13It implements a complete file interface, one-shot (de)compression functions,
14and types for sequential (de)compression.
15
16Here is a resume of the features offered by the bz2 module:
17
18\begin{itemize}
19\item \class{BZ2File} class implements a complete file interface, including
Fred Drake3a2cda82002-11-15 16:38:06 +000020 \method{readline()}, \method{readlines()},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000021 \method{writelines()}, \method{seek()}, etc;
22\item \class{BZ2File} class implements emulated \method{seek()} support;
23\item \class{BZ2File} class implements universal newline support;
24\item \class{BZ2File} class offers an optimized line iteration using
25 the readahead algorithm borrowed from file objects;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000026\item Sequential (de)compression supported by \class{BZ2Compressor} and
27 \class{BZ2Decompressor} classes;
28\item One-shot (de)compression supported by \function{compress()} and
29 \function{decompress()} functions;
30\item Thread safety uses individual locking mechanism;
31\item Complete inline documentation;
32\end{itemize}
33
34
35\subsection{(De)compression of files}
36
37Handling of compressed files is offered by the \class{BZ2File} class.
38
Fred Drake18c7d982002-11-05 17:54:02 +000039\begin{classdesc}{BZ2File}{filename\optional{, mode\optional{,
40 buffering\optional{, compresslevel}}}}
41Open a bz2 file. Mode can be either \code{'r'} or \code{'w'}, for reading
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000042(default) or writing. When opened for writing, the file will be created if
Fred Drake18c7d982002-11-05 17:54:02 +000043it doesn't exist, and truncated otherwise. If \var{buffering} is given,
44\code{0} means unbuffered, and larger numbers specify the buffer size;
45the default is \code{0}. If
Gustavo Niemeyer057ab142002-11-25 18:51:43 +000046\var{compresslevel} is given, it must be a number between \code{1} and
Fred Drake18c7d982002-11-05 17:54:02 +000047\code{9}; the default is \code{9}.
Fred Drake3a2cda82002-11-15 16:38:06 +000048Add a \character{U} to mode to open the file for input with universal newline
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049support. Any line ending in the input file will be seen as a
Fred Drake3a2cda82002-11-15 16:38:06 +000050\character{\e n} in Python. Also, a file so opened gains the
51attribute \member{newlines}; the value for this attribute is one of
52\code{None} (no newline read yet), \code{'\e r'}, \code{'\e n'},
53\code{'\e r\e n'} or a tuple containing all the newline types
54seen. Universal newlines are available only when reading.
55Instances support iteration in the same way as normal \class{file}
56instances.
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000057\end{classdesc}
58
59\begin{methoddesc}[BZ2File]{close}{}
60Close the file. Sets data attribute \member{closed} to true. A closed file
61cannot be used for further I/O operations. \method{close()} may be called
62more than once without error.
63\end{methoddesc}
64
65\begin{methoddesc}[BZ2File]{read}{\optional{size}}
66Read at most \var{size} uncompressed bytes, returned as a string. If the
67\var{size} argument is negative or omitted, read until EOF is reached.
68\end{methoddesc}
69
70\begin{methoddesc}[BZ2File]{readline}{\optional{size}}
71Return the next line from the file, as a string, retaining newline.
72A non-negative \var{size} argument limits the maximum number of bytes to
73return (an incomplete line may be returned then). Return an empty
74string at EOF.
75\end{methoddesc}
76
77\begin{methoddesc}[BZ2File]{readlines}{\optional{size}}
78Return a list of lines read. The optional \var{size} argument, if given,
79is an approximate bound on the total number of bytes in the lines returned.
80\end{methoddesc}
81
Fred Drake18c7d982002-11-05 17:54:02 +000082\begin{methoddesc}[BZ2File]{seek}{offset\optional{, whence}}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000083Move to new file position. Argument \var{offset} is a byte count. Optional
Thomas Wouters902d6eb2007-01-09 23:18:33 +000084argument \var{whence} defaults to \code{os.SEEK_SET} or \code{0} (offset from start of file;
85offset should be \code{>= 0}); other values are \code{os.SEEK_CUR} or \code{1} (move relative to
86current position; offset can be positive or negative), and \code{os.SEEK_END} or \code{2} (move relative to end
87of file; offset is usually negative, although many platforms allow seeking beyond
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000088the end of a file).
89
90Note that seeking of bz2 files is emulated, and depending on the parameters
91the operation may be extremely slow.
92\end{methoddesc}
93
94\begin{methoddesc}[BZ2File]{tell}{}
95Return the current file position, an integer (may be a long integer).
96\end{methoddesc}
97
98\begin{methoddesc}[BZ2File]{write}{data}
99Write string \var{data} to file. Note that due to buffering, \method{close()}
100may be needed before the file on disk reflects the data written.
101\end{methoddesc}
102
103\begin{methoddesc}[BZ2File]{writelines}{sequence_of_strings}
104Write the sequence of strings to the file. Note that newlines are not added.
105The sequence can be any iterable object producing strings. This is equivalent
106to calling write() for each string.
107\end{methoddesc}
108
109
110\subsection{Sequential (de)compression}
111
112Sequential compression and decompression is done using the classes
113\class{BZ2Compressor} and \class{BZ2Decompressor}.
114
Fred Drake18c7d982002-11-05 17:54:02 +0000115\begin{classdesc}{BZ2Compressor}{\optional{compresslevel}}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000116Create a new compressor object. This object may be used to compress
117data sequentially. If you want to compress data in one shot, use the
118\function{compress()} function instead. The \var{compresslevel} parameter,
Fred Drake18c7d982002-11-05 17:54:02 +0000119if given, must be a number between \code{1} and \code{9}; the default
120is \code{9}.
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000121\end{classdesc}
122
123\begin{methoddesc}[BZ2Compressor]{compress}{data}
124Provide more data to the compressor object. It will return chunks of compressed
125data whenever possible. When you've finished providing data to compress, call
126the \method{flush()} method to finish the compression process, and return what
127is left in internal buffers.
128\end{methoddesc}
129
130\begin{methoddesc}[BZ2Compressor]{flush}{}
131Finish the compression process and return what is left in internal buffers. You
132must not use the compressor object after calling this method.
133\end{methoddesc}
134
135\begin{classdesc}{BZ2Decompressor}{}
136Create a new decompressor object. This object may be used to decompress
137data sequentially. If you want to decompress data in one shot, use the
138\function{decompress()} function instead.
139\end{classdesc}
140
141\begin{methoddesc}[BZ2Decompressor]{decompress}{data}
142Provide more data to the decompressor object. It will return chunks of
143decompressed data whenever possible. If you try to decompress data after the
144end of stream is found, \exception{EOFError} will be raised. If any data was
145found after the end of stream, it'll be ignored and saved in
146\member{unused\_data} attribute.
147\end{methoddesc}
148
149
150\subsection{One-shot (de)compression}
151
Neal Norwitz110aa502002-11-05 23:55:27 +0000152One-shot compression and decompression is provided through the
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000153\function{compress()} and \function{decompress()} functions.
154
Fred Drake18c7d982002-11-05 17:54:02 +0000155\begin{funcdesc}{compress}{data\optional{, compresslevel}}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000156Compress \var{data} in one shot. If you want to compress data sequentially,
157use an instance of \class{BZ2Compressor} instead. The \var{compresslevel}
Fred Drake18c7d982002-11-05 17:54:02 +0000158parameter, if given, must be a number between \code{1} and \code{9};
159the default is \code{9}.
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000160\end{funcdesc}
161
Fred Drake18c7d982002-11-05 17:54:02 +0000162\begin{funcdesc}{decompress}{data}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000163Decompress \var{data} in one shot. If you want to decompress data
164sequentially, use an instance of \class{BZ2Decompressor} instead.
165\end{funcdesc}