Blame - Lib/email/Generator.py - platform/external/python/cpython2

2002-04-10 21:01:31 +0000

[diff] [blame]

1

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

2

# Author: barry@zope.com (Barry Warsaw)

3

4

"""Classes to generate plain text from a message object tree.

"""

import time

import re

import random

from types import ListType, StringType

12

from cStringIO import StringIO

13

Barry Warsaw

2001-10-17 20:51:42 +0000

[diff] [blame]

14

EMPTYSTRING = ''

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

SEMISPACE = '; '

BAR = '|'

UNDERSCORE = '_'

NL = '\n'

Barry Warsaw

2001-10-17 20:51:42 +0000

[diff] [blame]

19

NLTAB = '\n\t'

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

SEMINLTAB = ';\n\t'

SPACE8 = ' ' * 8

fcre = re.compile(r'^From ', re.MULTILINE)

24

25

Barry Warsaw

e968ead

2001-10-04 17:05:11 +0000

[diff] [blame]

26

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

27

class Generator:

28

"""Generates output from a Message object tree.

29

30

This basic generator writes the message to the given file object as plain

text.

"""

#

# Public interface

#

def __init__(self, outfp, mangle_from_=1, maxheaderlen=78):

38

"""Create the generator for message flattening.

39

40

outfp is the output file-like object for writing the message to. It

41

must have a write() method.

42

43

Optional mangle_from_ is a flag that, when true, escapes From_ lines

44

in the body of the message by putting a `>' in front of them.

45

46

Optional maxheaderlen specifies the longest length for a non-continued

47

header. When a header line is longer (in characters, with tabs

48

expanded to 8 spaces), than maxheaderlen, the header will be broken on

49

semicolons and continued as per RFC 2822. If no semicolon is found,

50

then the header is left alone. Set to zero to disable wrapping

51

headers. Default is 78, as recommended (but not required by RFC

2822.

"""

self._fp = outfp

self._mangle_from_ = mangle_from_

56

self.__first = 1

57

self.__maxheaderlen = maxheaderlen

58

59

def write(self, s):

60

# Just delegate to the file object

61

self._fp.write(s)

62

Barry Warsaw

2002-06-02 19:02:37 +0000

[diff] [blame^]

63

def flatten(self, msg, unixfrom=0):

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

64

"""Print the message object tree rooted at msg to the output file

65

specified when the Generator instance was created.

66

67

unixfrom is a flag that forces the printing of a Unix From_ delimiter

68

before the first object in the message tree. If the original message

69

has no From_ delimiter, a `standard' one is crafted. By default, this

70

is 0 to inhibit the printing of any From_ delimiter.

71

72

Note that for subobjects, no From_ line is printed.

73

"""

74

if unixfrom:

75

ufrom = msg.get_unixfrom()

76

if not ufrom:

77

ufrom = 'From nobody ' + time.ctime(time.time())

78

print >> self._fp, ufrom

79

self._write(msg)

80

Barry Warsaw

2002-06-02 19:02:37 +0000

[diff] [blame^]

81

# For backwards compatibility, but this is slower

82

__call__ = flatten

83

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

84

#

85

# Protected interface - undocumented ;/

86

#

87

88

def _write(self, msg):

89

# We can't write the headers yet because of the following scenario:

90

# say a multipart message includes the boundary string somewhere in

91

# its body. We'd have to calculate the new boundary /before/ we write

92

# the headers so that we can write the correct Content-Type:

93

# parameter.

94

#

95

# The way we do this, so as to make the _handle_*() methods simpler,

96

# is to cache any subpart writes into a StringIO. The we write the

97

# headers and the StringIO contents. That way, subpart handlers can

98

# Do The Right Thing, and can still modify the Content-Type: header if

# necessary.

oldfp = self._fp

try:

self._fp = sfp = StringIO()

self._dispatch(msg)

finally:

self._fp = oldfp

# Write the headers. First we see if the message object wants to

107

# handle that itself. If not, we'll do it generically.

108

meth = getattr(msg, '_write_headers', None)

109

if meth is None:

110

self._write_headers(msg)

111

else:

112

meth(self)

113

self._fp.write(sfp.getvalue())

114

115

def _dispatch(self, msg):

116

# Get the Content-Type: for the message, then try to dispatch to

117

# self._handle_maintype_subtype(). If there's no handler for the full

118

# MIME type, then dispatch to self._handle_maintype(). If that's

119

# missing too, then dispatch to self._writeBody().

120

ctype = msg.get_type()

121

if ctype is None:

122

# No Content-Type: header so try the default handler

123

self._writeBody(msg)

124

else:

125

# We do have a Content-Type: header.

126

specific = UNDERSCORE.join(ctype.split('/')).replace('-', '_')

127

meth = getattr(self, '_handle_' + specific, None)

128

if meth is None:

129

generic = msg.get_main_type().replace('-', '_')

130

meth = getattr(self, '_handle_' + generic, None)

131

if meth is None:

132

meth = self._writeBody

meth(msg)

#

# Default handlers

#

def _write_headers(self, msg):

140

for h, v in msg.items():

141

# We only write the MIME-Version: header for the outermost

142

# container message. Unfortunately, we can't use same technique

143

# as for the Unix-From above because we don't know when

144

# MIME-Version: will occur.

145

if h.lower() == 'mime-version' and not self.__first:

146

continue

147

# RFC 2822 says that lines SHOULD be no more than maxheaderlen

148

# characters wide, so we're well within our rights to split long

149

# headers.

150

text = '%s: %s' % (h, v)

151

if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:

152

text = self._split_header(text)

153

print >> self._fp, text

154

# A blank line always separates headers from body

155

print >> self._fp

156

157

def _split_header(self, text):

158

maxheaderlen = self.__maxheaderlen

159

# Find out whether any lines in the header are really longer than

160

# maxheaderlen characters wide. There could be continuation lines

161

# that actually shorten it. Also, replace hard tabs with 8 spaces.

162

lines = [s.replace('\t', SPACE8) for s in text.split('\n')]

163

for line in lines:

164

if len(line) > maxheaderlen:

165

break

166

else:

167

# No line was actually longer than maxheaderlen characters, so

168

# just return the original unchanged.

169

return text

170

rtn = []

171

for line in text.split('\n'):

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

172

splitline = []

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

173

# Short lines can remain unchanged

174

if len(line.replace('\t', SPACE8)) <= maxheaderlen:

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

175

splitline.append(line)

176

rtn.append(SEMINLTAB.join(splitline))

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

177

else:

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

178

oldlen = len(line)

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

179

# Try to break the line on semicolons, but if that doesn't

Barry Warsaw

2001-10-17 20:51:42 +0000

[diff] [blame]

180

# work, try to split on folding whitespace.

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

181

while len(line) > maxheaderlen:

182

i = line.rfind(';', 0, maxheaderlen)

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

183

if i < 0:

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

184

break

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

185

splitline.append(line[:i])

186

line = line[i+1:].lstrip()

187

if len(line) <> oldlen:

Barry Warsaw

2001-10-17 20:51:42 +0000

[diff] [blame]

188

# Splitting on semis worked

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

189

splitline.append(line)

190

rtn.append(SEMINLTAB.join(splitline))

191

continue

Barry Warsaw

2001-10-17 20:51:42 +0000

[diff] [blame]

192

# Splitting on semis didn't help, so try to split on

193

# whitespace.

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

194

parts = re.split(r'(\s+)', line)

Barry Warsaw

2001-10-17 20:51:42 +0000

[diff] [blame]

195

# Watch out though for "Header: longnonsplittableline"

196

if parts[0].endswith(':') and len(parts) == 3:

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

197

rtn.append(line)

198

continue

Barry Warsaw

2001-10-17 20:51:42 +0000

[diff] [blame]

first = parts.pop(0)

sublines = [first]

acc = len(first)

while parts:

len0 = len(parts[0])

len1 = len(parts[1])

if acc + len0 + len1 < maxheaderlen:

206

sublines.append(parts.pop(0))

207

sublines.append(parts.pop(0))

208

acc += len0 + len1

209

else:

210

# Split it here, but don't forget to ignore the

211

# next whitespace-only part

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

212

splitline.append(EMPTYSTRING.join(sublines))

Barry Warsaw

2001-10-17 20:51:42 +0000

[diff] [blame]

del parts[0]

first = parts.pop(0)

sublines = [first]

acc = len(first)

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

217

splitline.append(EMPTYSTRING.join(sublines))

218

rtn.append(NLTAB.join(splitline))

219

return NL.join(rtn)

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

220

221

#

222

# Handlers for writing types and subtypes

223

#

224

225

def _handle_text(self, msg):

226

payload = msg.get_payload()

Barry Warsaw

2001-09-26 05:32:41 +0000

[diff] [blame]

227

if payload is None:

228

return

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

229

cset = msg.get_charset()

230

if cset is not None:

231

payload = cset.body_encode(payload)

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

232

if not isinstance(payload, StringType):

Barry Warsaw

2001-09-26 05:32:41 +0000

[diff] [blame]

233

raise TypeError, 'string payload expected: %s' % type(payload)

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

234

if self._mangle_from_:

235

payload = fcre.sub('>From ', payload)

236

self._fp.write(payload)

237

238

# Default body handler

239

_writeBody = _handle_text

240

241

def _handle_multipart(self, msg, isdigest=0):

242

# The trick here is to write out each part separately, merge them all

243

# together, and then make sure that the boundary we've chosen isn't

244

# present in the payload.

245

msgtexts = []

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

246

subparts = msg.get_payload()

247

if subparts is None:

248

# Nothing has every been attached

249

boundary = msg.get_boundary(failobj=_make_boundary())

250

print >> self._fp, '--' + boundary

251

print >> self._fp, '\n'

252

print >> self._fp, '--' + boundary + '--'

253

return

254

elif not isinstance(subparts, ListType):

255

# Scalar payload

256

subparts = [subparts]

257

for part in subparts:

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

258

s = StringIO()

Barry Warsaw

2001-09-26 05:32:41 +0000

[diff] [blame]

259

g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)

Barry Warsaw

2002-06-02 19:02:37 +0000

[diff] [blame^]

260

g.flatten(part, unixfrom=0)

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

261

msgtexts.append(s.getvalue())

262

# Now make sure the boundary we've selected doesn't appear in any of

263

# the message texts.

264

alltext = NL.join(msgtexts)

265

# BAW: What about boundaries that are wrapped in double-quotes?

266

boundary = msg.get_boundary(failobj=_make_boundary(alltext))

267

# If we had to calculate a new boundary because the body text

268

# contained that string, set the new boundary. We don't do it

269

# unconditionally because, while set_boundary() preserves order, it

270

# doesn't preserve newlines/continuations in headers. This is no big

271

# deal in practice, but turns out to be inconvenient for the unittest

272

# suite.

273

if msg.get_boundary() <> boundary:

274

msg.set_boundary(boundary)

275

# Write out any preamble

276

if msg.preamble is not None:

277

self._fp.write(msg.preamble)

278

# First boundary is a bit different; it doesn't have a leading extra

279

# newline.

280

print >> self._fp, '--' + boundary

281

if isdigest:

282

print >> self._fp

283

# Join and write the individual parts

284

joiner = '\n--' + boundary + '\n'

285

if isdigest:

286

# multipart/digest types effectively add an extra newline between

287

# the boundary and the body part.

288

joiner += '\n'

289

self._fp.write(joiner.join(msgtexts))

290

print >> self._fp, '\n--' + boundary + '--',

291

# Write out any epilogue

292

if msg.epilogue is not None:

Barry Warsaw

856c32b

2001-10-19 04:06:39 +0000

[diff] [blame]

293

if not msg.epilogue.startswith('\n'):

294

print >> self._fp

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

295

self._fp.write(msg.epilogue)

296

297

def _handle_multipart_digest(self, msg):

298

self._handle_multipart(msg, isdigest=1)

299

Barry Warsaw

2001-09-26 05:32:41 +0000

[diff] [blame]

300

def _handle_message_delivery_status(self, msg):

301

# We can't just write the headers directly to self's file object

302

# because this will leave an extra newline between the last header

303

# block and the boundary. Sigh.

304

blocks = []

305

for part in msg.get_payload():

306

s = StringIO()

307

g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)

Barry Warsaw

2002-06-02 19:02:37 +0000

[diff] [blame^]

308

g.flatten(part, unixfrom=0)

Barry Warsaw

2001-09-26 05:32:41 +0000

[diff] [blame]

309

text = s.getvalue()

310

lines = text.split('\n')

311

# Strip off the unnecessary trailing empty line

312

if lines and lines[-1] == '':

313

blocks.append(NL.join(lines[:-1]))

314

else:

315

blocks.append(text)

316

# Now join all the blocks with an empty line. This has the lovely

317

# effect of separating each block with an empty line, but not adding

318

# an extra one after the last one.

319

self._fp.write(NL.join(blocks))

320

321

def _handle_message(self, msg):

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

322

s = StringIO()

Barry Warsaw

2001-09-26 05:32:41 +0000

[diff] [blame]

323

g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)

Barry Warsaw

2002-06-02 19:02:37 +0000

[diff] [blame^]

324

# The payload of a message/rfc822 part should be a multipart sequence

325

# of length 1. The zeroth element of the list should be the Message

326

# object for the subpart.Extract that object, stringify it, and write

327

# that out.

328

g.flatten(msg.get_payload(0), unixfrom=0)

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

329

self._fp.write(s.getvalue())

330

331

Barry Warsaw

e968ead

2001-10-04 17:05:11 +0000

[diff] [blame]

332

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

333

class DecodedGenerator(Generator):

334

"""Generator a text representation of a message.

335

336

Like the Generator base class, except that non-text parts are substituted

337

with a format string representing the part.

338

"""

339

def __init__(self, outfp, mangle_from_=1, maxheaderlen=78, fmt=None):

340

"""Like Generator.__init__() except that an additional optional

341

argument is allowed.

342

343

Walks through all subparts of a message. If the subpart is of main

344

type `text', then it prints the decoded payload of the subpart.

345

346

Otherwise, fmt is a format string that is used instead of the message

347

payload. fmt is expanded with the following keywords (in

348

%(keyword)s format):

349

350

type : Full MIME type of the non-text part

351

maintype : Main MIME type of the non-text part

352

subtype : Sub-MIME type of the non-text part

353

filename : Filename of the non-text part

354

description: Description associated with the non-text part

355

encoding : Content transfer encoding of the non-text part

356

357

The default value for fmt is None, meaning

358

359

[Non-text (%(type)s) part of message omitted, filename %(filename)s]

360

"""

361

Generator.__init__(self, outfp, mangle_from_, maxheaderlen)

362

if fmt is None:

363

fmt = ('[Non-text (%(type)s) part of message omitted, '

364

'filename %(filename)s]')

365

self._fmt = fmt

366

367

def _dispatch(self, msg):

368

for part in msg.walk():

Barry Warsaw

2001-09-26 05:32:41 +0000

[diff] [blame]

369

maintype = part.get_main_type('text')

370

if maintype == 'text':

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

371

print >> self, part.get_payload(decode=1)

Barry Warsaw

2001-09-26 05:32:41 +0000

[diff] [blame]

372

elif maintype == 'multipart':

373

# Just skip this

374

pass

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

375

else:

376

print >> self, self._fmt % {

377

'type' : part.get_type('[no MIME type]'),

378

'maintype' : part.get_main_type('[no main MIME type]'),

379

'subtype' : part.get_subtype('[no sub-MIME type]'),

380

'filename' : part.get_filename('[no filename]'),

381

'description': part.get('Content-Description',

382

'[no description]'),

383

'encoding' : part.get('Content-Transfer-Encoding',

'[no encoding]'),

}

Barry Warsaw

2001-10-04 17:05:11 +0000

[diff] [blame]

388

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

389

# Helper

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

390

def _make_boundary(text=None):

Barry Warsaw