| """Generic MIME parser. | 
 |  | 
 | Classes: | 
 |  | 
 |         MimeParser - Generic MIME parser. | 
 |  | 
 | Exceptions: | 
 |  | 
 |         MimeError - Exception raised by MimeParser class. | 
 |  | 
 | XXX To do: | 
 |  | 
 | - Content-transfer-encoding issues | 
 | - Use Content-length header in rawbody()? | 
 | - Cache parts instead of reparsing each time | 
 | - The message strings in exceptions could use some work | 
 |  | 
 | """ | 
 |  | 
 | from types import *                     # Python types, not MIME types :-) | 
 | import string | 
 | import regex | 
 | import SubFile | 
 | import mimetools | 
 |  | 
 |  | 
 | MimeError = "MimeParser.MimeError"      # Exception raised by this class | 
 |  | 
 |  | 
 | class MimeParser: | 
 |  | 
 |     """Generic MIME parser. | 
 |  | 
 |     This requires a seekable file. | 
 |  | 
 |     """ | 
 |  | 
 |     def __init__(self, fp): | 
 |         """Constructor: store the file pointer and parse the headers.""" | 
 |         self._fp = fp | 
 |         self._start = fp.tell() | 
 |         self._headers = h = mimetools.Message(fp) | 
 |         self._bodystart = fp.tell() | 
 |         self._multipart = h.getmaintype() == 'multipart' | 
 |  | 
 |     def multipart(self): | 
 |         """Return whether this is a multipart message.""" | 
 |         return self._multipart | 
 |  | 
 |     def headers(self): | 
 |         """Return the headers of the MIME message, as a Message object.""" | 
 |         return self._headers | 
 |  | 
 |     def rawbody(self): | 
 |         """Return the raw body of the MIME message, as a file-like object. | 
 |  | 
 |         This is a fairly low-level interface -- for a multipart | 
 |         message, you'd have to parse the body yourself, and it doesn't | 
 |         translate the Content-transfer-encoding. | 
 |          | 
 |         """ | 
 |         # XXX Use Content-length to set end if it exists? | 
 |         return SubFile.SubFile(self._fp, self._bodystart) | 
 |  | 
 |     def body(self): | 
 |         """Return the body of a 1-part MIME message, as a file-like object. | 
 |  | 
 |         This should interpret the Content-transfer-encoding, if any | 
 |         (XXX currently it doesn't). | 
 |          | 
 |         """ | 
 |         if self._multipart: | 
 |             raise MimeError, "body() only works for 1-part messages" | 
 |         return self.rawbody() | 
 |  | 
 |     _re_content_length = regex.compile('content-length:[ \t]*\([0-9]+\)', | 
 |                                        regex.casefold) | 
 |  | 
 |     def rawparts(self): | 
 |         """Return the raw body parts of a multipart MIME message. | 
 |  | 
 |         This returns a list of SubFile() objects corresponding to the | 
 |         parts.  Note that the phantom part before the first separator | 
 |         is returned too, as list item 0.  If the final part is not | 
 |         followed by a terminator, it is ignored, and this error is not | 
 |         reported.  (XXX: the error should be raised). | 
 |  | 
 |         """ | 
 |         if not self._multipart: | 
 |             raise MimeError, "[raw]parts() only works for multipart messages" | 
 |         h = self._headers | 
 |         separator = h.getparam('boundary') | 
 |         if not separator: | 
 |             raise MimeError, "multipart boundary not specified" | 
 |         separator = "--" + separator | 
 |         terminator = separator + "--" | 
 |         ns = len(separator) | 
 |         list = [] | 
 |         f = self._fp | 
 |         start = f.tell() | 
 |         clength = -1 | 
 |         bodystart = -1 | 
 |         inheaders = 0 | 
 |         while 1: | 
 |             end = f.tell() | 
 |             line = f.readline() | 
 |             if not line: | 
 |                 break | 
 |             if line[:2] != "--" or line[:ns] != separator: | 
 |                 if inheaders: | 
 |                     re = self._re_content_length | 
 |                     if re.match(line) > 0: | 
 |                         try: | 
 |                             clength = string.atoi(re.group(1)) | 
 |                         except string.atoi_error: | 
 |                             pass | 
 |                     if not string.strip(line): | 
 |                         inheaders = 0 | 
 |                         bodystart = f.tell() | 
 |                         if clength > 0: | 
 |                             # Skip binary data | 
 |                             f.read(clength) | 
 |                 continue | 
 |             line = string.strip(line) | 
 |             if line == terminator or line == separator: | 
 |                 if clength >= 0: | 
 |                     # The Content-length header determines the subfile size | 
 |                     end = bodystart + clength | 
 |                 else: | 
 |                     # The final newline is not part of the content | 
 |                     end = end-1 | 
 |                 list.append(SubFile.SubFile(f, start, end)) | 
 |                 start = f.tell() | 
 |                 clength = -1 | 
 |                 inheaders = 1 | 
 |                 if line == terminator: | 
 |                     break | 
 |         return list | 
 |  | 
 |     def parts(self): | 
 |         """Return the parsed body parts of a multipart MIME message. | 
 |  | 
 |         This returns a list of MimeParser() instances corresponding to | 
 |         the parts.  The phantom part before the first separator is not | 
 |         included. | 
 |  | 
 |         """ | 
 |         return map(MimeParser, self.rawparts()[1:]) | 
 |  | 
 |     def getsubpartbyposition(self, indices): | 
 |         part = self | 
 |         for i in indices: | 
 |             part = part.parts()[i] | 
 |         return part | 
 |  | 
 |     def getsubpartbyid(self, id): | 
 |         h = self._headers | 
 |         cid = h.getheader('content-id') | 
 |         if cid and cid == id: | 
 |             return self | 
 |         if self._multipart: | 
 |             for part in self.parts(): | 
 |                 parser = MimeParser(part) | 
 |                 hit = parser.getsubpartbyid(id) | 
 |                 if hit: | 
 |                     return hit | 
 |         return None | 
 |  | 
 |     def index(self): | 
 |         """Return an index of the MIME file. | 
 |  | 
 |         This parses the entire file and returns index information | 
 |         about it, in the form of a tuple | 
 |  | 
 |             (ctype, headers, body) | 
 |  | 
 |         where 'ctype' is the content type string of the message | 
 |         (e.g. `text/plain' or `multipart/mixed') and 'headers' is a | 
 |         Message instance containing the message headers (which should | 
 |         be treated as read-only). | 
 |  | 
 |         The 'body' item depends on the content type: | 
 |  | 
 |         - If it is an atomic message (anything except for content type | 
 |           multipart/*), it is the file-like object returned by | 
 |           self.body(). | 
 |  | 
 |         - For a content type of multipart/*, it is the list of | 
 |           MimeParser() objects returned by self.parts(). | 
 |  | 
 |         """ | 
 |         if self._multipart: | 
 |             body = self.parts() | 
 |         else: | 
 |             body = self.body() | 
 |         return self._headers.gettype(), self._headers, body | 
 |  | 
 |  | 
 | def _show(parser, level=0): | 
 |     """Helper for _test().""" | 
 |     ctype, headers, body = parser.index() | 
 |     print ctype, | 
 |     if type(body) == ListType: | 
 |         nparts = len(body) | 
 |         print "(%d part%s):" % (nparts, nparts != 1 and "s" or "") | 
 |         n = 0 | 
 |         for part in body: | 
 |             n = n+1 | 
 |             print "%*d." % (4*level+2, n), | 
 |             _show(part, level+1) | 
 |     else: | 
 |         bodylines = body.readlines() | 
 |         print "(%d header lines, %d body lines)" % ( | 
 |             len(headers.headers), len(bodylines)) | 
 |         for line in headers.headers + ['\n'] + bodylines: | 
 |             if line[-1:] == '\n': line = line[:-1] | 
 |             print "    "*level + line | 
 |  | 
 | def _test(args = None): | 
 |     """Test program invoked when run as a script. | 
 |  | 
 |     When a filename argument is specified, it reads from that file. | 
 |     When no arguments are present, it defaults to 'testkp.txt' if it | 
 |     exists, else it defaults to stdin. | 
 |  | 
 |     """ | 
 |     if not args: | 
 |         import sys | 
 |         args = sys.argv[1:] | 
 |     if args: | 
 |         fn = args[0] | 
 |     else: | 
 |         import os | 
 |         fn = 'testkp.txt' | 
 |         if not os.path.exists(fn): | 
 |             fn = '-' | 
 |     if fn == '-': | 
 |         fp = sys.stdin | 
 |     else: | 
 |         fp = open(fn) | 
 |     mp = MimeParser(fp) | 
 |     _show(mp) | 
 |  | 
 | if __name__ == '__main__': | 
 |     import sys | 
 |     _test() |