blob: 9cca993e5ad17525724ebf1a658ce4b95c0aa85e [file] [log] [blame]
Guido van Rossum8668e8e1998-06-28 17:55:53 +00001% Documentation by ESR
2\section{Standard Module \module{multifile}}
Fred Draked795c5c1998-08-07 15:55:14 +00003\declaremodule{standard}{multifile}
Fred Drakeb91e9341998-07-23 17:59:49 +00004
Fred Draked795c5c1998-08-07 15:55:14 +00005\modulesynopsis{Support for reading files which contain distinct
6parts, such as some MIME data.}
Fred Drakeb91e9341998-07-23 17:59:49 +00007
Guido van Rossum8668e8e1998-06-28 17:55:53 +00008
Fred Drake1717ba41998-07-02 19:36:50 +00009The \class{MultiFile} object enables you to treat sections of a text
10file as file-like input objects, with \code{''} being returned by
11\method{readline()} when a given delimiter pattern is encountered. The
Guido van Rossum8668e8e1998-06-28 17:55:53 +000012defaults of this class are designed to make it useful for parsing
13MIME multipart messages, but by subclassing it and overriding methods
14it can be easily adapted for more general use.
15
Fred Drake1717ba41998-07-02 19:36:50 +000016\begin{classdesc}{MultiFile}{fp\optional{, seekable}}
Guido van Rossum8668e8e1998-06-28 17:55:53 +000017Create a multi-file. You must instantiate this class with an input
Fred Drake1717ba41998-07-02 19:36:50 +000018object argument for the \class{MultiFile} instance to get lines from,
19such as as a file object returned by \function{open()}.
Guido van Rossum8668e8e1998-06-28 17:55:53 +000020
Fred Drake1717ba41998-07-02 19:36:50 +000021\class{MultiFile} only ever looks at the input object's
22\method{readline()}, \method{seek()} and \method{tell()} methods, and
23the latter two are only needed if you want random access to the
24individual MIME parts. To use \class{MultiFile} on a non-seekable
25stream object, set the optional \var{seekable} argument to false; this
26will prevent using the input object's \method{seek()} and
27\method{tell()} methods.
Guido van Rossum8668e8e1998-06-28 17:55:53 +000028\end{classdesc}
29
Fred Drake1717ba41998-07-02 19:36:50 +000030It will be useful to know that in \class{MultiFile}'s view of the world, text
Guido van Rossum8668e8e1998-06-28 17:55:53 +000031is composed of three kinds of lines: data, section-dividers, and
32end-markers. MultiFile is designed to support parsing of
33messages that may have multiple nested message parts, each with its
34own pattern for section-divider and end-marker lines.
35
Fred Draked795c5c1998-08-07 15:55:14 +000036
37\subsection{MultiFile Objects \label{MultiFile-objects}}
Guido van Rossum8668e8e1998-06-28 17:55:53 +000038
39A \class{MultiFile} instance has the following methods:
40
41\begin{methoddesc}{push}{str}
42Push a boundary string. When an appropriately decorated version of
43this boundary is found as an input line, it will be interpreted as a
Fred Drake1717ba41998-07-02 19:36:50 +000044section-divider or end-marker. All subsequent
45reads will return the empty string to indicate end-of-file, until a
46call to \method{pop()} removes the boundary a or \method{next()} call
47reenables it.
Guido van Rossum8668e8e1998-06-28 17:55:53 +000048
49It is possible to push more than one boundary. Encountering the
50most-recently-pushed boundary will return EOF; encountering any other
51boundary will raise an error.
52\end{methoddesc}
53
54\begin{methoddesc}{readline}{str}
55Read a line. If the line is data (not a section-divider or end-marker
56or real EOF) return it. If the line matches the most-recently-stacked
Guido van Rossum8ec619f1998-06-30 16:35:25 +000057boundary, return \code{''} and set \code{self.last} to 1 or 0 according as
Guido van Rossum8668e8e1998-06-28 17:55:53 +000058the match is or is not an end-marker. If the line matches any other
Fred Drake1717ba41998-07-02 19:36:50 +000059stacked boundary, raise an error. On encountering end-of-file on the
60underlying stream object, the method raises \exception{Error} unless
61all boundaries have been popped.
Guido van Rossum8668e8e1998-06-28 17:55:53 +000062\end{methoddesc}
63
64\begin{methoddesc}{readlines}{str}
Fred Drake1717ba41998-07-02 19:36:50 +000065Return all lines remaining in this part as a list of strings.
Guido van Rossum8668e8e1998-06-28 17:55:53 +000066\end{methoddesc}
67
Fred Drake1717ba41998-07-02 19:36:50 +000068\begin{methoddesc}{read}{}
Guido van Rossum8668e8e1998-06-28 17:55:53 +000069Read all lines, up to the next section. Return them as a single
70(multiline) string. Note that this doesn't take a size argument!
71\end{methoddesc}
72
Fred Drake1717ba41998-07-02 19:36:50 +000073\begin{methoddesc}{next}{}
Guido van Rossum8668e8e1998-06-28 17:55:53 +000074Skip lines to the next section (that is, read lines until a
Fred Drake1717ba41998-07-02 19:36:50 +000075section-divider or end-marker has been consumed). Return true if
76there is such a section, false if an end-marker is seen. Re-enable
77the most-recently-pushed boundary.
Guido van Rossum8668e8e1998-06-28 17:55:53 +000078\end{methoddesc}
79
Fred Drake1717ba41998-07-02 19:36:50 +000080\begin{methoddesc}{pop}{}
81Pop a section boundary. This boundary will no longer be interpreted
82as EOF.
Guido van Rossum8668e8e1998-06-28 17:55:53 +000083\end{methoddesc}
84
Fred Drake1717ba41998-07-02 19:36:50 +000085\begin{methoddesc}{seek}{pos\optional{, whence}}
Guido van Rossum8668e8e1998-06-28 17:55:53 +000086Seek. Seek indices are relative to the start of the current section.
Fred Drake1717ba41998-07-02 19:36:50 +000087The \var{pos} and \var{whence} arguments are interpreted as for a file
88seek.
Guido van Rossum8668e8e1998-06-28 17:55:53 +000089\end{methoddesc}
90
Fred Drake1717ba41998-07-02 19:36:50 +000091\begin{methoddesc}{tell}{}
92Return the file position relative to the start of the current section.
Guido van Rossum8668e8e1998-06-28 17:55:53 +000093\end{methoddesc}
94
95\begin{methoddesc}{is_data}{str}
Fred Drake1717ba41998-07-02 19:36:50 +000096Return true if \var{str} is data and false if it might be a section
97boundary. As written, it tests for a prefix other than \code{'--'} at
98start of line (which all MIME boundaries have) but it is declared so
99it can be overridden in derived classes.
Guido van Rossum8668e8e1998-06-28 17:55:53 +0000100
101Note that this test is used intended as a fast guard for the real
Fred Drake1717ba41998-07-02 19:36:50 +0000102boundary tests; if it always returns false it will merely slow
103processing, not cause it to fail.
Guido van Rossum8668e8e1998-06-28 17:55:53 +0000104\end{methoddesc}
105
106\begin{methoddesc}{section_divider}{str}
107Turn a boundary into a section-divider line. By default, this
Fred Drake1717ba41998-07-02 19:36:50 +0000108method prepends \code{'--'} (which MIME section boundaries have) but
109it is declared so it can be overridden in derived classes. This
110method need not append LF or CR-LF, as comparison with the result
111ignores trailing whitespace.
Guido van Rossum8668e8e1998-06-28 17:55:53 +0000112\end{methoddesc}
113
114\begin{methoddesc}{end_marker}{str}
115Turn a boundary string into an end-marker line. By default, this
Fred Drake1717ba41998-07-02 19:36:50 +0000116method prepends \code{'--'} and appends \code{'--'} (like a
117MIME-multipart end-of-message marker) but it is declared so it can be
118be overridden in derived classes. This method need not append LF or
119CR-LF, as comparison with the result ignores trailing whitespace.
Guido van Rossum8668e8e1998-06-28 17:55:53 +0000120\end{methoddesc}
121
122Finally, \class{MultiFile} instances have two public instance variables:
123
124\begin{memberdesc}{level}
Fred Drake1717ba41998-07-02 19:36:50 +0000125Nesting depth of the current part.
Guido van Rossum8668e8e1998-06-28 17:55:53 +0000126\end{memberdesc}
127
128\begin{memberdesc}{last}
Fred Drake1717ba41998-07-02 19:36:50 +0000129True if the last end-of-file was for an end-of-message marker.
Guido van Rossum8668e8e1998-06-28 17:55:53 +0000130\end{memberdesc}
131
Fred Drake1717ba41998-07-02 19:36:50 +0000132
Fred Draked795c5c1998-08-07 15:55:14 +0000133\subsection{\class{MultiFile} Example \label{multifile-example}}
Fred Drake1717ba41998-07-02 19:36:50 +0000134
135% This is almost unreadable; should be re-written when someone gets time.
Guido van Rossum8668e8e1998-06-28 17:55:53 +0000136
137\begin{verbatim}
Fred Drake1717ba41998-07-02 19:36:50 +0000138fp = MultiFile(sys.stdin, 0)
139fp.push(outer_boundary)
140message1 = fp.readlines()
141# We should now be either at real EOF or stopped on a message
142# boundary. Re-enable the outer boundary.
143fp.next()
144# Read another message with the same delimiter
145message2 = fp.readlines()
146# Re-enable that delimiter again
147fp.next()
148# Now look for a message subpart with a different boundary
149fp.push(inner_boundary)
150sub_header = fp.readlines()
151# If no exception has been thrown, we're looking at the start of
152# the message subpart. Reset and grab the subpart
153fp.next()
154sub_body = fp.readlines()
155# Got it. Now pop the inner boundary to re-enable the outer one.
156fp.pop()
157# Read to next outer boundary
158message3 = fp.readlines()
Guido van Rossum8668e8e1998-06-28 17:55:53 +0000159\end{verbatim}