blob: db10eb513a9867a09c08ea07569c3a63138db47d [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001\section{\module{bz2} ---
2 Compression compatible with \program{bzip2}}
3
4\declaremodule{builtin}{bz2}
5\modulesynopsis{Interface to compression and decompression
6 routines compatible with \program{bzip2}.}
7\moduleauthor{Gustavo Niemeyer}{niemeyer@conectiva.com}
8\sectionauthor{Gustavo Niemeyer}{niemeyer@conectiva.com}
9
10\versionadded{2.3}
11
12This module provides a comprehensive interface for the bz2 compression library.
13It implements a complete file interface, one-shot (de)compression functions,
14and types for sequential (de)compression.
15
16Here is a resume of the features offered by the bz2 module:
17
18\begin{itemize}
19\item \class{BZ2File} class implements a complete file interface, including
20 \method{readline()}, \method{readlines()}, \method{xreadlines()},
21 \method{writelines()}, \method{seek()}, etc;
22\item \class{BZ2File} class implements emulated \method{seek()} support;
23\item \class{BZ2File} class implements universal newline support;
24\item \class{BZ2File} class offers an optimized line iteration using
25 the readahead algorithm borrowed from file objects;
26\item \class{BZ2File} class developed inheriting builtin file type
Fred Drake18c7d982002-11-05 17:54:02 +000027 (\code{issubclass(BZ2File, file)} is \code{True});
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000028\item Sequential (de)compression supported by \class{BZ2Compressor} and
29 \class{BZ2Decompressor} classes;
30\item One-shot (de)compression supported by \function{compress()} and
31 \function{decompress()} functions;
32\item Thread safety uses individual locking mechanism;
33\item Complete inline documentation;
34\end{itemize}
35
36
37\subsection{(De)compression of files}
38
39Handling of compressed files is offered by the \class{BZ2File} class.
40
Fred Drake18c7d982002-11-05 17:54:02 +000041\begin{classdesc}{BZ2File}{filename\optional{, mode\optional{,
42 buffering\optional{, compresslevel}}}}
43Open a bz2 file. Mode can be either \code{'r'} or \code{'w'}, for reading
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000044(default) or writing. When opened for writing, the file will be created if
Fred Drake18c7d982002-11-05 17:54:02 +000045it doesn't exist, and truncated otherwise. If \var{buffering} is given,
46\code{0} means unbuffered, and larger numbers specify the buffer size;
47the default is \code{0}. If
48\var{compresslevel} is given, must be a number between \code{1} and
49\code{9}; the default is \code{9}.
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000050Add a \code{'U'} to mode to open the file for input with universal newline
51support. Any line ending in the input file will be seen as a
Fred Drake18c7d982002-11-05 17:54:02 +000052\character{\textbackslash n}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000053in Python. Also, a file so opened gains the attribute \member{newlines};
54the value for this attribute is one of \code{None} (no newline read yet),
55\code{'\textbackslash r'}, \code{'\textbackslash n'},
56\code{'\textbackslash r\textbackslash n'} or a tuple containing all the
57newline types seen. Universal newlines are available only when reading.
58\end{classdesc}
59
60\begin{methoddesc}[BZ2File]{close}{}
61Close the file. Sets data attribute \member{closed} to true. A closed file
62cannot be used for further I/O operations. \method{close()} may be called
63more than once without error.
64\end{methoddesc}
65
66\begin{methoddesc}[BZ2File]{read}{\optional{size}}
67Read at most \var{size} uncompressed bytes, returned as a string. If the
68\var{size} argument is negative or omitted, read until EOF is reached.
69\end{methoddesc}
70
71\begin{methoddesc}[BZ2File]{readline}{\optional{size}}
72Return the next line from the file, as a string, retaining newline.
73A non-negative \var{size} argument limits the maximum number of bytes to
74return (an incomplete line may be returned then). Return an empty
75string at EOF.
76\end{methoddesc}
77
78\begin{methoddesc}[BZ2File]{readlines}{\optional{size}}
79Return a list of lines read. The optional \var{size} argument, if given,
80is an approximate bound on the total number of bytes in the lines returned.
81\end{methoddesc}
82
83\begin{methoddesc}[BZ2File]{xreadlines}{}
84For backward compatibility. \class{BZ2File} objects now include the
85performance optimizations previously implemented in the \module{xreadlines}
86module.
87\end{methoddesc}
88
89\begin{methoddesc}[BZ2File]{\_\_iter\_\_}{}
90Iterate trough the file lines. Iteration optimization is implemented
91using the same readahead algorithm available in \class{file} objects.
92\end{methoddesc}
93
Fred Drake18c7d982002-11-05 17:54:02 +000094\begin{methoddesc}[BZ2File]{seek}{offset\optional{, whence}}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000095Move to new file position. Argument \var{offset} is a byte count. Optional
96argument \var{whence} defaults to \code{0} (offset from start of file,
97offset should be \code{>= 0}); other values are \code{1} (move relative to
98current position, positive or negative), and \code{2} (move relative to end
99of file, usually negative, although many platforms allow seeking beyond
100the end of a file).
101
102Note that seeking of bz2 files is emulated, and depending on the parameters
103the operation may be extremely slow.
104\end{methoddesc}
105
106\begin{methoddesc}[BZ2File]{tell}{}
107Return the current file position, an integer (may be a long integer).
108\end{methoddesc}
109
110\begin{methoddesc}[BZ2File]{write}{data}
111Write string \var{data} to file. Note that due to buffering, \method{close()}
112may be needed before the file on disk reflects the data written.
113\end{methoddesc}
114
115\begin{methoddesc}[BZ2File]{writelines}{sequence_of_strings}
116Write the sequence of strings to the file. Note that newlines are not added.
117The sequence can be any iterable object producing strings. This is equivalent
118to calling write() for each string.
119\end{methoddesc}
120
121
122\subsection{Sequential (de)compression}
123
124Sequential compression and decompression is done using the classes
125\class{BZ2Compressor} and \class{BZ2Decompressor}.
126
Fred Drake18c7d982002-11-05 17:54:02 +0000127\begin{classdesc}{BZ2Compressor}{\optional{compresslevel}}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000128Create a new compressor object. This object may be used to compress
129data sequentially. If you want to compress data in one shot, use the
130\function{compress()} function instead. The \var{compresslevel} parameter,
Fred Drake18c7d982002-11-05 17:54:02 +0000131if given, must be a number between \code{1} and \code{9}; the default
132is \code{9}.
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000133\end{classdesc}
134
135\begin{methoddesc}[BZ2Compressor]{compress}{data}
136Provide more data to the compressor object. It will return chunks of compressed
137data whenever possible. When you've finished providing data to compress, call
138the \method{flush()} method to finish the compression process, and return what
139is left in internal buffers.
140\end{methoddesc}
141
142\begin{methoddesc}[BZ2Compressor]{flush}{}
143Finish the compression process and return what is left in internal buffers. You
144must not use the compressor object after calling this method.
145\end{methoddesc}
146
147\begin{classdesc}{BZ2Decompressor}{}
148Create a new decompressor object. This object may be used to decompress
149data sequentially. If you want to decompress data in one shot, use the
150\function{decompress()} function instead.
151\end{classdesc}
152
153\begin{methoddesc}[BZ2Decompressor]{decompress}{data}
154Provide more data to the decompressor object. It will return chunks of
155decompressed data whenever possible. If you try to decompress data after the
156end of stream is found, \exception{EOFError} will be raised. If any data was
157found after the end of stream, it'll be ignored and saved in
158\member{unused\_data} attribute.
159\end{methoddesc}
160
161
162\subsection{One-shot (de)compression}
163
164One-shot compression and decompression is provided trough the
165\function{compress()} and \function{decompress()} functions.
166
Fred Drake18c7d982002-11-05 17:54:02 +0000167\begin{funcdesc}{compress}{data\optional{, compresslevel}}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000168Compress \var{data} in one shot. If you want to compress data sequentially,
169use an instance of \class{BZ2Compressor} instead. The \var{compresslevel}
Fred Drake18c7d982002-11-05 17:54:02 +0000170parameter, if given, must be a number between \code{1} and \code{9};
171the default is \code{9}.
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000172\end{funcdesc}
173
Fred Drake18c7d982002-11-05 17:54:02 +0000174\begin{funcdesc}{decompress}{data}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000175Decompress \var{data} in one shot. If you want to decompress data
176sequentially, use an instance of \class{BZ2Decompressor} instead.
177\end{funcdesc}