Blame - Lib/email/Generator.py - platform/external/python/cpython2

2002-04-10 21:01:31 +0000

[diff] [blame]

1

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

2

# Author: barry@zope.com (Barry Warsaw)

3

4

"""Classes to generate plain text from a message object tree.

"""

import time

import re

import random

from types import ListType, StringType

12

from cStringIO import StringIO

13

Barry Warsaw

2002-06-28 23:41:42 +0000

[diff] [blame]

14

from email.Header import Header

15

Barry Warsaw

d1eeecb

2001-10-17 20:51:42 +0000

[diff] [blame]

16

EMPTYSTRING = ''

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

SEMISPACE = '; '

BAR = '|'

UNDERSCORE = '_'

NL = '\n'

Barry Warsaw

d1eeecb

2001-10-17 20:51:42 +0000

[diff] [blame]

21

NLTAB = '\n\t'

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

SEMINLTAB = ';\n\t'

SPACE8 = ' ' * 8

fcre = re.compile(r'^From ', re.MULTILINE)

26

27

Barry Warsaw

e968ead

2001-10-04 17:05:11 +0000

[diff] [blame]

28

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

29

class Generator:

30

"""Generates output from a Message object tree.

31

32

This basic generator writes the message to the given file object as plain

text.

"""

#

# Public interface

#

def __init__(self, outfp, mangle_from_=1, maxheaderlen=78):

40

"""Create the generator for message flattening.

41

42

outfp is the output file-like object for writing the message to. It

43

must have a write() method.

44

45

Optional mangle_from_ is a flag that, when true, escapes From_ lines

46

in the body of the message by putting a `>' in front of them.

47

48

Optional maxheaderlen specifies the longest length for a non-continued

49

header. When a header line is longer (in characters, with tabs

50

expanded to 8 spaces), than maxheaderlen, the header will be broken on

51

semicolons and continued as per RFC 2822. If no semicolon is found,

52

then the header is left alone. Set to zero to disable wrapping

53

headers. Default is 78, as recommended (but not required by RFC

2822.

"""

self._fp = outfp

self._mangle_from_ = mangle_from_

58

self.__first = 1

59

self.__maxheaderlen = maxheaderlen

60

61

def write(self, s):

62

# Just delegate to the file object

63

self._fp.write(s)

64

Barry Warsaw

2002-06-02 19:02:37 +0000

[diff] [blame]

65

def flatten(self, msg, unixfrom=0):

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

66

"""Print the message object tree rooted at msg to the output file

67

specified when the Generator instance was created.

68

69

unixfrom is a flag that forces the printing of a Unix From_ delimiter

70

before the first object in the message tree. If the original message

71

has no From_ delimiter, a `standard' one is crafted. By default, this

72

is 0 to inhibit the printing of any From_ delimiter.

73

74

Note that for subobjects, no From_ line is printed.

75

"""

76

if unixfrom:

77

ufrom = msg.get_unixfrom()

78

if not ufrom:

79

ufrom = 'From nobody ' + time.ctime(time.time())

80

print >> self._fp, ufrom

81

self._write(msg)

82

Barry Warsaw

2002-06-02 19:02:37 +0000

[diff] [blame]

83

# For backwards compatibility, but this is slower

84

__call__ = flatten

85

Barry Warsaw

2002-07-09 02:43:47 +0000

[diff] [blame]

86

def clone(self, fp):

87

"""Clone this generator with the exact same options."""

88

return self.__class__(fp, self._mangle_from_, self.__maxheaderlen)

89

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

90

#

91

# Protected interface - undocumented ;/

92

#

93

94

def _write(self, msg):

95

# We can't write the headers yet because of the following scenario:

96

# say a multipart message includes the boundary string somewhere in

97

# its body. We'd have to calculate the new boundary /before/ we write

98

# the headers so that we can write the correct Content-Type:

99

# parameter.

100

#

101

# The way we do this, so as to make the _handle_*() methods simpler,

102

# is to cache any subpart writes into a StringIO. The we write the

103

# headers and the StringIO contents. That way, subpart handlers can

104

# Do The Right Thing, and can still modify the Content-Type: header if

# necessary.

oldfp = self._fp

try:

self._fp = sfp = StringIO()

self._dispatch(msg)

finally:

self._fp = oldfp

# Write the headers. First we see if the message object wants to

113

# handle that itself. If not, we'll do it generically.

114

meth = getattr(msg, '_write_headers', None)

115

if meth is None:

116

self._write_headers(msg)

117

else:

118

meth(self)

119

self._fp.write(sfp.getvalue())

120

121

def _dispatch(self, msg):

122

# Get the Content-Type: for the message, then try to dispatch to

Barry Warsaw

f488b2c

2002-07-11 18:48:40 +0000

[diff] [blame]

123

# self._handle_<maintype>_<subtype>(). If there's no handler for the

124

# full MIME type, then dispatch to self._handle_<maintype>(). If

125

# that's missing too, then dispatch to self._writeBody().

Barry Warsaw

1cecdc6

2002-07-19 22:21:02 +0000

[diff] [blame]

126

ctype = msg.get_content_type()

Barry Warsaw

2002-07-09 02:43:47 +0000

[diff] [blame]

127

# We do have a Content-Type: header.

128

main, sub = ctype.split('/')

129

specific = UNDERSCORE.join((main, sub)).replace('-', '_')

130

meth = getattr(self, '_handle_' + specific, None)

131

if meth is None:

132

generic = main.replace('-', '_')

133

meth = getattr(self, '_handle_' + generic, None)

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

134

if meth is None:

Barry Warsaw

2002-07-09 02:43:47 +0000

[diff] [blame]

135

meth = self._writeBody

136

meth(msg)

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

#

# Default handlers

#

def _write_headers(self, msg):

143

for h, v in msg.items():

144

# We only write the MIME-Version: header for the outermost

145

# container message. Unfortunately, we can't use same technique

146

# as for the Unix-From above because we don't know when

147

# MIME-Version: will occur.

148

if h.lower() == 'mime-version' and not self.__first:

149

continue

150

# RFC 2822 says that lines SHOULD be no more than maxheaderlen

151

# characters wide, so we're well within our rights to split long

152

# headers.

153

text = '%s: %s' % (h, v)

154

if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:

Barry Warsaw

2002-06-28 23:41:42 +0000

[diff] [blame]

155

text = self._split_header(h, text)

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

156

print >> self._fp, text

157

# A blank line always separates headers from body

158

print >> self._fp

159

Barry Warsaw

2002-06-28 23:41:42 +0000

[diff] [blame]

160

def _split_header(self, name, text):

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

161

maxheaderlen = self.__maxheaderlen

162

# Find out whether any lines in the header are really longer than

163

# maxheaderlen characters wide. There could be continuation lines

164

# that actually shorten it. Also, replace hard tabs with 8 spaces.

Barry Warsaw

2002-06-28 23:41:42 +0000

[diff] [blame]

165

lines = [s.replace('\t', SPACE8) for s in text.splitlines()]

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

166

for line in lines:

167

if len(line) > maxheaderlen:

168

break

169

else:

170

# No line was actually longer than maxheaderlen characters, so

171

# just return the original unchanged.

172

return text

Barry Warsaw

2002-06-28 23:41:42 +0000

[diff] [blame]

173

# The `text' argument already has the field name prepended, so don't

174

# provide it here or the first line will get folded too short.

175

h = Header(text, maxlinelen=maxheaderlen,

176

# For backwards compatibility, we use a hard tab here

177

continuation_ws='\t')

178

return h.encode()

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

179

180

#

181

# Handlers for writing types and subtypes

182

#

183

184

def _handle_text(self, msg):

185

payload = msg.get_payload()

Barry Warsaw

2001-09-26 05:32:41 +0000

[diff] [blame]

186

if payload is None:

187

return

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

188

cset = msg.get_charset()

189

if cset is not None:

190

payload = cset.body_encode(payload)

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

191

if not isinstance(payload, StringType):

Barry Warsaw

2001-09-26 05:32:41 +0000

[diff] [blame]

192

raise TypeError, 'string payload expected: %s' % type(payload)

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

193

if self._mangle_from_:

194

payload = fcre.sub('>From ', payload)

195

self._fp.write(payload)

196

197

# Default body handler

198

_writeBody = _handle_text

199

Barry Warsaw

2002-07-09 02:43:47 +0000

[diff] [blame]

200

def _handle_multipart(self, msg):

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

201

# The trick here is to write out each part separately, merge them all

202

# together, and then make sure that the boundary we've chosen isn't

203

# present in the payload.

204

msgtexts = []

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

205

subparts = msg.get_payload()

206

if subparts is None:

Barry Warsaw

2002-07-09 02:43:47 +0000

[diff] [blame]

207

# Nothing has ever been attached

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

208

boundary = msg.get_boundary(failobj=_make_boundary())

209

print >> self._fp, '--' + boundary

210

print >> self._fp, '\n'

211

print >> self._fp, '--' + boundary + '--'

212

return

213

elif not isinstance(subparts, ListType):

214

# Scalar payload

215

subparts = [subparts]

216

for part in subparts:

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

217

s = StringIO()

Barry Warsaw

2002-07-09 02:43:47 +0000

[diff] [blame]

218

g = self.clone(s)

Barry Warsaw

2002-06-02 19:02:37 +0000

[diff] [blame]

219

g.flatten(part, unixfrom=0)

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

220

msgtexts.append(s.getvalue())

221

# Now make sure the boundary we've selected doesn't appear in any of

222

# the message texts.

223

alltext = NL.join(msgtexts)

224

# BAW: What about boundaries that are wrapped in double-quotes?

225

boundary = msg.get_boundary(failobj=_make_boundary(alltext))

226

# If we had to calculate a new boundary because the body text

227

# contained that string, set the new boundary. We don't do it

228

# unconditionally because, while set_boundary() preserves order, it

229

# doesn't preserve newlines/continuations in headers. This is no big

230

# deal in practice, but turns out to be inconvenient for the unittest

231

# suite.

232

if msg.get_boundary() <> boundary:

233

msg.set_boundary(boundary)

234

# Write out any preamble

235

if msg.preamble is not None:

236

self._fp.write(msg.preamble)

237

# First boundary is a bit different; it doesn't have a leading extra

238

# newline.

239

print >> self._fp, '--' + boundary

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

240

# Join and write the individual parts

241

joiner = '\n--' + boundary + '\n'

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

242

self._fp.write(joiner.join(msgtexts))

243

print >> self._fp, '\n--' + boundary + '--',

244

# Write out any epilogue

245

if msg.epilogue is not None:

Barry Warsaw

856c32b

2001-10-19 04:06:39 +0000

[diff] [blame]

246

if not msg.epilogue.startswith('\n'):

247

print >> self._fp

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

248

self._fp.write(msg.epilogue)

249

Barry Warsaw

2001-09-26 05:32:41 +0000

[diff] [blame]

250

def _handle_message_delivery_status(self, msg):

251

# We can't just write the headers directly to self's file object

252

# because this will leave an extra newline between the last header

253

# block and the boundary. Sigh.

254

blocks = []

255

for part in msg.get_payload():

256

s = StringIO()

Barry Warsaw

2002-07-09 02:43:47 +0000

[diff] [blame]

257

g = self.clone(s)

Barry Warsaw

2002-06-02 19:02:37 +0000

[diff] [blame]

258

g.flatten(part, unixfrom=0)

Barry Warsaw

2001-09-26 05:32:41 +0000

[diff] [blame]

259

text = s.getvalue()

260

lines = text.split('\n')

261

# Strip off the unnecessary trailing empty line

262

if lines and lines[-1] == '':

263

blocks.append(NL.join(lines[:-1]))

264

else:

265

blocks.append(text)

266

# Now join all the blocks with an empty line. This has the lovely

267

# effect of separating each block with an empty line, but not adding

268

# an extra one after the last one.

269

self._fp.write(NL.join(blocks))

270

271

def _handle_message(self, msg):

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

272

s = StringIO()

Barry Warsaw

2002-07-09 02:43:47 +0000

[diff] [blame]

273

g = self.clone(s)

Barry Warsaw

2002-06-02 19:02:37 +0000

[diff] [blame]

274

# The payload of a message/rfc822 part should be a multipart sequence

275

# of length 1. The zeroth element of the list should be the Message

Barry Warsaw

2002-07-09 02:43:47 +0000

[diff] [blame]

276

# object for the subpart. Extract that object, stringify it, and

277

# write it out.

Barry Warsaw

2002-06-02 19:02:37 +0000

[diff] [blame]

278

g.flatten(msg.get_payload(0), unixfrom=0)

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

279

self._fp.write(s.getvalue())

280

281

Barry Warsaw

e968ead

2001-10-04 17:05:11 +0000

[diff] [blame]

282

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

283

class DecodedGenerator(Generator):

284

"""Generator a text representation of a message.

285

286

Like the Generator base class, except that non-text parts are substituted

287

with a format string representing the part.

288

"""

289

def __init__(self, outfp, mangle_from_=1, maxheaderlen=78, fmt=None):

290

"""Like Generator.__init__() except that an additional optional

291

argument is allowed.

292

293

Walks through all subparts of a message. If the subpart is of main

294

type `text', then it prints the decoded payload of the subpart.

295

296

Otherwise, fmt is a format string that is used instead of the message

297

payload. fmt is expanded with the following keywords (in

298

%(keyword)s format):

299

300

type : Full MIME type of the non-text part

301

maintype : Main MIME type of the non-text part

302

subtype : Sub-MIME type of the non-text part

303

filename : Filename of the non-text part

304

description: Description associated with the non-text part

305

encoding : Content transfer encoding of the non-text part

306

307

The default value for fmt is None, meaning

308

309

[Non-text (%(type)s) part of message omitted, filename %(filename)s]

310

"""

311

Generator.__init__(self, outfp, mangle_from_, maxheaderlen)

312

if fmt is None:

313

fmt = ('[Non-text (%(type)s) part of message omitted, '

314

'filename %(filename)s]')

315

self._fmt = fmt

316

317

def _dispatch(self, msg):

318

for part in msg.walk():

Barry Warsaw

2001-09-26 05:32:41 +0000

[diff] [blame]

319

maintype = part.get_main_type('text')

320

if maintype == 'text':

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

321

print >> self, part.get_payload(decode=1)

Barry Warsaw

2001-09-26 05:32:41 +0000

[diff] [blame]

322

elif maintype == 'multipart':

323

# Just skip this

324

pass

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

325

else:

326

print >> self, self._fmt % {

327

'type' : part.get_type('[no MIME type]'),

328

'maintype' : part.get_main_type('[no main MIME type]'),

329

'subtype' : part.get_subtype('[no sub-MIME type]'),

330

'filename' : part.get_filename('[no filename]'),

331

'description': part.get('Content-Description',

332

'[no description]'),

333

'encoding' : part.get('Content-Transfer-Encoding',

'[no encoding]'),

}

Barry Warsaw

2001-10-04 17:05:11 +0000

[diff] [blame]

338

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

339

# Helper

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

340

def _make_boundary(text=None):

Barry Warsaw