blob: 35e2dc5aa92a0f25a1fca9a3a9f769d3598e97d5 [file] [log] [blame]
Barry Warsawba925802001-09-23 03:17:28 +00001# Copyright (C) 2001 Python Software Foundation
2# Author: barry@zope.com (Barry Warsaw)
3
4"""Basic message object for the email package object model.
5"""
6
7from __future__ import generators
8
9import re
10import base64
11import quopri
12from cStringIO import StringIO
13from types import ListType
14
15SEMISPACE = '; '
16
17# Intrapackage imports
18import Errors
19import Utils
20
21
22
23class Message:
24 """Basic message object for use inside the object tree.
25
26 A message object is defined as something that has a bunch of RFC 2822
27 headers and a payload. If the body of the message is a multipart, then
28 the payload is a list of Messages, otherwise it is a string.
29
30 These objects implement part of the `mapping' interface, which assumes
31 there is exactly one occurrance of the header per message. Some headers
32 do in fact appear multiple times (e.g. Received:) and for those headers,
33 you must use the explicit API to set or get all the headers. Not all of
34 the mapping methods are implemented.
35
36 """
37 def __init__(self):
38 self._headers = []
39 self._unixfrom = None
40 self._payload = None
41 # Defaults for multipart messages
42 self.preamble = self.epilogue = None
43
44 def __str__(self):
45 """Return the entire formatted message as a string.
46 This includes the headers, body, and `unixfrom' line.
47 """
48 return self.as_string(unixfrom=1)
49
50 def as_string(self, unixfrom=0):
51 """Return the entire formatted message as a string.
52 Optional `unixfrom' when true, means include the Unix From_ envelope
53 header.
54 """
55 from Generator import Generator
56 fp = StringIO()
57 g = Generator(fp)
58 g(self, unixfrom=unixfrom)
59 return fp.getvalue()
60
61 def is_multipart(self):
62 """Return true if the message consists of multiple parts."""
63 if type(self._payload) is ListType:
64 return 1
65 return 0
66
67 #
68 # Unix From_ line
69 #
70 def set_unixfrom(self, unixfrom):
71 self._unixfrom = unixfrom
72
73 def get_unixfrom(self):
74 return self._unixfrom
75
76 #
77 # Payload manipulation.
78 #
79 def add_payload(self, payload):
80 """Add the given payload to the current payload.
81
82 If the current payload is empty, then the current payload will be made
83 a scalar, set to the given value.
84 """
85 if self._payload is None:
86 self._payload = payload
87 elif type(self._payload) is ListType:
88 self._payload.append(payload)
89 elif self.get_main_type() not in (None, 'multipart'):
90 raise Errors.MultipartConversionError(
91 'Message main Content-Type: must be "multipart" or missing')
92 else:
93 self._payload = [self._payload, payload]
94
95 # A useful synonym
96 attach = add_payload
97
98 def get_payload(self, i=None, decode=0):
99 """Return the current payload exactly as is.
100
101 Optional i returns that index into the payload.
102
103 Optional decode is a flag indicating whether the payload should be
104 decoded or not, according to the Content-Transfer-Encoding: header.
105 When true and the message is not a multipart, the payload will be
106 decoded if this header's value is `quoted-printable' or `base64'. If
107 some other encoding is used, or the header is missing, the payload is
108 returned as-is (undecoded). If the message is a multipart and the
109 decode flag is true, then None is returned.
110 """
111 if i is None:
112 payload = self._payload
113 elif type(self._payload) is not ListType:
114 raise TypeError, i
115 else:
116 payload = self._payload[i]
117 if decode:
118 if self.is_multipart():
119 return None
120 cte = self.get('content-transfer-encoding', '')
121 if cte.lower() == 'quoted-printable':
122 return Utils._qdecode(payload)
123 elif cte.lower() == 'base64':
124 return Utils._bdecode(payload)
125 # Everything else, including encodings with 8bit or 7bit are returned
126 # unchanged.
127 return payload
128
129
130 def set_payload(self, payload):
131 """Set the payload to the given value."""
132 self._payload = payload
133
134 #
135 # MAPPING INTERFACE (partial)
136 #
137 def __len__(self):
138 """Get the total number of headers, including duplicates."""
139 return len(self._headers)
140
141 def __getitem__(self, name):
142 """Get a header value.
143
144 Return None if the header is missing instead of raising an exception.
145
146 Note that if the header appeared multiple times, exactly which
147 occurrance gets returned is undefined. Use getall() to get all
148 the values matching a header field name.
149 """
150 return self.get(name)
151
152 def __setitem__(self, name, val):
153 """Set the value of a header.
154
155 Note: this does not overwrite an existing header with the same field
156 name. Use __delitem__() first to delete any existing headers.
157 """
158 self._headers.append((name, val))
159
160 def __delitem__(self, name):
161 """Delete all occurrences of a header, if present.
162
163 Does not raise an exception if the header is missing.
164 """
165 name = name.lower()
166 newheaders = []
167 for k, v in self._headers:
168 if k.lower() <> name:
169 newheaders.append((k, v))
170 self._headers = newheaders
171
172 def __contains__(self, key):
173 return key.lower() in [k.lower() for k, v in self._headers]
174
175 def has_key(self, name):
176 """Return true if the message contains the header."""
177 return self[name] <> None
178
179 def keys(self):
180 """Return a list of all the message's header field names.
181
182 These will be sorted in the order they appeared in the original
183 message, and may contain duplicates. Any fields deleted and
184 re-inserted are always appended to the header list.
185 """
186 return [k for k, v in self._headers]
187
188 def values(self):
189 """Return a list of all the message's header values.
190
191 These will be sorted in the order they appeared in the original
192 message, and may contain duplicates. Any fields deleted and
193 re-inserted are alwyas appended to the header list.
194 """
195 return [v for k, v in self._headers]
196
197 def items(self):
198 """Get all the message's header fields and values.
199
200 These will be sorted in the order they appeared in the original
201 message, and may contain duplicates. Any fields deleted and
202 re-inserted are alwyas appended to the header list.
203 """
204 return self._headers[:]
205
206 def get(self, name, failobj=None):
207 """Get a header value.
208
209 Like __getitem__() but return failobj instead of None when the field
210 is missing.
211 """
212 name = name.lower()
213 for k, v in self._headers:
214 if k.lower() == name:
215 return v
216 return failobj
217
218 #
219 # Additional useful stuff
220 #
221
222 def get_all(self, name, failobj=None):
223 """Return a list of all the values for the named field.
224
225 These will be sorted in the order they appeared in the original
226 message, and may contain duplicates. Any fields deleted and
227 re-inserted are alwyas appended to the header list.
228 """
229 values = []
230 name = name.lower()
231 for k, v in self._headers:
232 if k.lower() == name:
233 values.append(v)
234 return values
235
236 def add_header(self, _name, _value, **_params):
237 """Extended header setting.
238
239 name is the header field to add. keyword arguments can be used to set
240 additional parameters for the header field, with underscores converted
241 to dashes. Normally the parameter will be added as key="value" unless
242 value is None, in which case only the key will be added.
243
244 Example:
245
246 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
247
248 """
249 parts = []
250 for k, v in _params.items():
251 if v is None:
252 parts.append(k.replace('_', '-'))
253 else:
254 parts.append('%s="%s"' % (k.replace('_', '-'), v))
255 if _value is not None:
256 parts.insert(0, _value)
257 self._headers.append((_name, SEMISPACE.join(parts)))
258
259 def get_type(self, failobj=None):
260 """Returns the message's content type.
261
262 The returned string is coerced to lowercase and returned as a single
263 string of the form `maintype/subtype'. If there was no Content-Type:
264 header in the message, failobj is returned (defaults to None).
265 """
266 missing = []
267 value = self.get('content-type', missing)
268 if value is missing:
269 return failobj
270 return re.split(r';\s+', value)[0].lower()
271
272 def get_main_type(self, failobj=None):
273 """Return the message's main content type if present."""
274 missing = []
275 ctype = self.get_type(missing)
276 if ctype is missing:
277 return failobj
278 parts = ctype.split('/')
279 if len(parts) > 0:
280 return ctype.split('/')[0]
281 return failobj
282
283 def get_subtype(self, failobj=None):
284 """Return the message's content subtype if present."""
285 missing = []
286 ctype = self.get_type(missing)
287 if ctype is missing:
288 return failobj
289 parts = ctype.split('/')
290 if len(parts) > 1:
291 return ctype.split('/')[1]
292 return failobj
293
294 def get_params(self, failobj=None, header='content-type'):
295 """Return the message's Content-Type: parameters, as a list.
296
297 Optional failobj is the object to return if there is no Content-Type:
298 header. Optional header is the header to search instead of
299 Content-Type:
300 """
301 missing = []
302 value = self.get(header, missing)
303 if value is missing:
304 return failobj
305 return re.split(r';\s+', value)[1:]
306
307 def get_param(self, param, failobj=None, header='content-type'):
308 """Return the parameter value if found in the Content-Type: header.
309
310 Optional failobj is the object to return if there is no Content-Type:
311 header. Optional header is the header to search instead of
312 Content-Type:
313 """
314 param = param.lower()
315 missing = []
316 params = self.get_params(missing, header=header)
317 if params is missing:
318 return failobj
319 for p in params:
320 try:
321 name, val = p.split('=', 1)
322 except ValueError:
323 # Must have been a bare attribute
324 name = p
325 val = ''
326 if name.lower() == param:
327 return Utils.unquote(val)
328 return failobj
329
330 def get_filename(self, failobj=None):
331 """Return the filename associated with the payload if present.
332
333 The filename is extracted from the Content-Disposition: header's
334 `filename' parameter, and it is unquoted.
335 """
336 missing = []
337 filename = self.get_param('filename', missing, 'content-disposition')
338 if filename is missing:
339 return failobj
340 return Utils.unquote(filename.strip())
341
342 def get_boundary(self, failobj=None):
343 """Return the boundary associated with the payload if present.
344
345 The boundary is extracted from the Content-Type: header's `boundary'
346 parameter, and it is unquoted.
347 """
348 missing = []
349 boundary = self.get_param('boundary', missing)
350 if boundary is missing:
351 return failobj
352 return Utils.unquote(boundary.strip())
353
354 def set_boundary(self, boundary):
355 """Set the boundary parameter in Content-Type: to 'boundary'.
356
357 This is subtly different than deleting the Content-Type: header and
358 adding a new one with a new boundary parameter via add_header(). The
359 main difference is that using the set_boundary() method preserves the
360 order of the Content-Type: header in the original message.
361
362 HeaderParseError is raised if the message has no Content-Type: header.
363 """
364 params = self.get_params()
365 if not params:
366 # There was no Content-Type: header, and we don't know what type
367 # to set it to, so raise an exception.
368 raise Errors.HeaderParseError, 'No Content-Type: header found'
369 newparams = []
370 foundp = 0
371 for p in params:
372 if p.lower().startswith('boundary='):
373 newparams.append('boundary="%s"' % boundary)
374 foundp = 1
375 else:
376 newparams.append(p)
377 if not foundp:
378 # The original Content-Type: header had no boundary attribute.
379 # Tack one one the end. BAW: should we raise an exception
380 # instead???
381 newparams.append('boundary="%s"' % boundary)
382 # Replace the existing Content-Type: header with the new value
383 newheaders = []
384 for h, v in self._headers:
385 if h.lower() == 'content-type':
386 value = v.split(';', 1)[0]
387 newparams.insert(0, value)
388 newheaders.append((h, SEMISPACE.join(newparams)))
389 else:
390 newheaders.append((h, v))
391 self._headers = newheaders
392
393 def walk(self):
394 """Walk over the message tree, yielding each subpart.
395
396 The walk is performed in breadth-first order. This method is a
397 generator.
398 """
399 if self.is_multipart():
400 for subpart in self.get_payload():
401 for subsubpart in subpart.walk():
402 yield subsubpart
403 else:
404 yield self
405
406 def get_charsets(self, failobj=None):
407 """Return a list containing the charset(s) used in this message.
408
409 The returned list of items describes the Content-Type: headers'
410 charset parameter for this message and all the subparts in its
411 payload.
412
413 Each item will either be a string (the value of the charset parameter
414 in the Content-Type: header of that part) or the value of the
415 'failobj' parameter (defaults to None), if the part does not have a
416 main MIME type of "text", or the charset is not defined.
417
418 The list will contain one string for each part of the message, plus
419 one for the container message (i.e. self), so that a non-multipart
420 message will still return a list of length 1.
421 """
422 return [part.get_param('charset', failobj) for part in self.walk()]