Blame - Lib/email/generator.py - platform/external/python/cpython2

Benjamin Peterson

46a9900

2010-01-09 18:45:30 +0000

[diff] [blame]

1

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

2

# Author: Barry Warsaw

3

# Contact: email-sig@python.org

4

5

"""Classes to generate plain text from a message object tree."""

6

7

__all__ = ['Generator', 'DecodedGenerator']

import re

import sys

import time

import random

import warnings

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame^]

15

from io import StringIO, BytesIO

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

16

from email.header import Header

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

17

from email.message import _has_surrogates

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

UNDERSCORE = '_'

NL = '\n'

fcre = re.compile(r'^From ', re.MULTILINE)

class Generator:

"""Generates output from a Message object tree.

28

29

This basic generator writes the message to the given file object as plain

text.

"""

#

# Public interface

#

def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):

37

"""Create the generator for message flattening.

38

39

outfp is the output file-like object for writing the message to. It

40

must have a write() method.

41

42

Optional mangle_from_ is a flag that, when True (the default), escapes

43

From_ lines in the body of the message by putting a `>' in front of

44

them.

45

46

Optional maxheaderlen specifies the longest length for a non-continued

47

header. When a header line is longer (in characters, with tabs

48

expanded to 8 spaces) than maxheaderlen, the header will split as

49

defined in the Header class. Set maxheaderlen to zero to disable

50

header wrapping. The default is 78, as recommended (but not required)

by RFC 2822.

"""

self._fp = outfp

self._mangle_from_ = mangle_from_

55

self._maxheaderlen = maxheaderlen

56

57

def write(self, s):

58

# Just delegate to the file object

59

self._fp.write(s)

60

61

def flatten(self, msg, unixfrom=False):

62

"""Print the message object tree rooted at msg to the output file

63

specified when the Generator instance was created.

64

65

unixfrom is a flag that forces the printing of a Unix From_ delimiter

66

before the first object in the message tree. If the original message

67

has no From_ delimiter, a `standard' one is crafted. By default, this

68

is False to inhibit the printing of any From_ delimiter.

69

70

Note that for subobjects, no From_ line is printed.

71

"""

72

if unixfrom:

73

ufrom = msg.get_unixfrom()

74

if not ufrom:

75

ufrom = 'From nobody ' + time.ctime(time.time())

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

76

self.write(ufrom + NL)

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

self._write(msg)

def clone(self, fp):

"""Clone this generator with the exact same options."""

81

return self.__class__(fp, self._mangle_from_, self._maxheaderlen)

82

83

#

84

# Protected interface - undocumented ;/

85

#

86

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

87

# Note that we use 'self.write' when what we are writing is coming from

88

# the source, and self._fp.write when what we are writing is coming from a

89

# buffer (because the Bytes subclass has already had a chance to transform

90

# the data in its write method in that case). This is an entirely

91

# pragmatic split determined by experiment; we could be more general by

92

# always using write and having the Bytes subclass write method detect when

93

# it has already transformed the input; but, since this whole thing is a

94

# hack anyway this seems good enough.

95

96

# We use these class constants when we need to manipulate data that has

97

# already been written to a buffer (ex: constructing a re to check the

98

# boundary), and the module level NL constant when adding new output to a

99

# buffer via self.write, because 'write' always takes strings.

100

# Having write always take strings makes the code simpler, but there are

101

# a few occasions when we need to write previously created data back

102

# to the buffer or to a new buffer; for those cases we use self._fp.write.

_NL = NL

_EMPTY = ''

def _new_buffer(self):

107

# BytesGenerator overrides this to return BytesIO.

108

return StringIO()

109

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

110

def _write(self, msg):

111

# We can't write the headers yet because of the following scenario:

112

# say a multipart message includes the boundary string somewhere in

113

# its body. We'd have to calculate the new boundary /before/ we write

114

# the headers so that we can write the correct Content-Type:

115

# parameter.

116

#

117

# The way we do this, so as to make the _handle_*() methods simpler,

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

118

# is to cache any subpart writes into a buffer. The we write the

119

# headers and the buffer contents. That way, subpart handlers can

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

120

# Do The Right Thing, and can still modify the Content-Type: header if

121

# necessary.

122

oldfp = self._fp

123

try:

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

124

self._fp = sfp = self._new_buffer()

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

self._dispatch(msg)

finally:

self._fp = oldfp

# Write the headers. First we see if the message object wants to

129

# handle that itself. If not, we'll do it generically.

130

meth = getattr(msg, '_write_headers', None)

131

if meth is None:

132

self._write_headers(msg)

133

else:

134

meth(self)

135

self._fp.write(sfp.getvalue())

136

137

def _dispatch(self, msg):

138

# Get the Content-Type: for the message, then try to dispatch to

139

# self._handle_<maintype>_<subtype>(). If there's no handler for the

140

# full MIME type, then dispatch to self._handle_<maintype>(). If

141

# that's missing too, then dispatch to self._writeBody().

142

main = msg.get_content_maintype()

143

sub = msg.get_content_subtype()

144

specific = UNDERSCORE.join((main, sub)).replace('-', '_')

145

meth = getattr(self, '_handle_' + specific, None)

146

if meth is None:

147

generic = main.replace('-', '_')

148

meth = getattr(self, '_handle_' + generic, None)

149

if meth is None:

150

meth = self._writeBody

meth(msg)

#

# Default handlers

#

def _write_headers(self, msg):

158

for h, v in msg.items():

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

159

self.write('%s: ' % h)

Guido van Rossum

9604e66

2007-08-30 03:46:43 +0000

[diff] [blame]

160

if isinstance(v, Header):

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

161

self.write(v.encode(maxlinelen=self._maxheaderlen)+NL)

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

162

else:

163

# Header's got lots of smarts, so use it.

164

header = Header(v, maxlinelen=self._maxheaderlen,

Barry Warsaw

70d61ce

2009-03-30 23:12:30 +0000

[diff] [blame]

165

header_name=h)

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

166

self.write(header.encode()+NL)

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

167

# A blank line always separates headers from body

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

168

self.write(NL)

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

169

170

#

171

# Handlers for writing types and subtypes

172

#

173

174

def _handle_text(self, msg):

175

payload = msg.get_payload()

176

if payload is None:

177

return

Guido van Rossum

3172c5d

2007-10-16 18:12:55 +0000

[diff] [blame]

178

if not isinstance(payload, str):

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

179

raise TypeError('string payload expected: %s' % type(payload))

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

180

if _has_surrogates(msg._payload):

181

charset = msg.get_param('charset')

182

if charset is not None:

183

del msg['content-transfer-encoding']

184

msg.set_payload(payload, charset)

185

payload = msg.get_payload()

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

186

if self._mangle_from_:

187

payload = fcre.sub('>From ', payload)

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

188

self.write(payload)

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

189

190

# Default body handler

191

_writeBody = _handle_text

192

193

def _handle_multipart(self, msg):

194

# The trick here is to write out each part separately, merge them all

195

# together, and then make sure that the boundary we've chosen isn't

196

# present in the payload.

197

msgtexts = []

198

subparts = msg.get_payload()

199

if subparts is None:

200

subparts = []

Guido van Rossum

3172c5d

2007-10-16 18:12:55 +0000

[diff] [blame]

201

elif isinstance(subparts, str):

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

202

# e.g. a non-strict parse of a message with no starting boundary.

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

203

self.write(subparts)

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

204

return

205

elif not isinstance(subparts, list):

206

# Scalar payload

207

subparts = [subparts]

208

for part in subparts:

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

209

s = self._new_buffer()

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

210

g = self.clone(s)

211

g.flatten(part, unixfrom=False)

212

msgtexts.append(s.getvalue())

213

# Now make sure the boundary we've selected doesn't appear in any of

214

# the message texts.

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

215

alltext = self._NL.join(msgtexts)

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

216

# BAW: What about boundaries that are wrapped in double-quotes?

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

217

boundary = msg.get_boundary(failobj=self._make_boundary(alltext))

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

218

# If we had to calculate a new boundary because the body text

219

# contained that string, set the new boundary. We don't do it

220

# unconditionally because, while set_boundary() preserves order, it

221

# doesn't preserve newlines/continuations in headers. This is no big

222

# deal in practice, but turns out to be inconvenient for the unittest

223

# suite.

224

if msg.get_boundary() != boundary:

225

msg.set_boundary(boundary)

226

# If there's a preamble, write it out, with a trailing CRLF

227

if msg.preamble is not None:

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

228

self.write(msg.preamble + NL)

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

229

# dash-boundary transport-padding CRLF

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

230

self.write('--' + boundary + NL)

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

231

# body-part

232

if msgtexts:

233

self._fp.write(msgtexts.pop(0))

234

# *encapsulation

235

# --> delimiter transport-padding

236

# --> CRLF body-part

237

for body_part in msgtexts:

238

# delimiter transport-padding CRLF

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

239

self.write('\n--' + boundary + NL)

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

240

# body-part

241

self._fp.write(body_part)

242

# close-delimiter transport-padding

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

243

self.write('\n--' + boundary + '--')

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

244

if msg.epilogue is not None:

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

245

self.write(NL)

246

self.write(msg.epilogue)

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

247

R. David Murray

a8f480f

2010-01-16 18:30:03 +0000

[diff] [blame]

248

def _handle_multipart_signed(self, msg):

249

# The contents of signed parts has to stay unmodified in order to keep

250

# the signature intact per RFC1847 2.1, so we disable header wrapping.

251

# RDM: This isn't enough to completely preserve the part, but it helps.

252

old_maxheaderlen = self._maxheaderlen

253

try:

254

self._maxheaderlen = 0

255

self._handle_multipart(msg)

256

finally:

257

self._maxheaderlen = old_maxheaderlen

258

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

259

def _handle_message_delivery_status(self, msg):

260

# We can't just write the headers directly to self's file object

261

# because this will leave an extra newline between the last header

262

# block and the boundary. Sigh.

263

blocks = []

264

for part in msg.get_payload():

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

265

s = self._new_buffer()

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

266

g = self.clone(s)

267

g.flatten(part, unixfrom=False)

268

text = s.getvalue()

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

269

lines = text.split(self._NL)

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

270

# Strip off the unnecessary trailing empty line

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

271

if lines and lines[-1] == self._EMPTY:

272

blocks.append(self._NL.join(lines[:-1]))

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

273

else:

274

blocks.append(text)

275

# Now join all the blocks with an empty line. This has the lovely

276

# effect of separating each block with an empty line, but not adding

277

# an extra one after the last one.

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

278

self._fp.write(self._NL.join(blocks))

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

279

280

def _handle_message(self, msg):

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

281

s = self._new_buffer()

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

282

g = self.clone(s)

283

# The payload of a message/rfc822 part should be a multipart sequence

284

# of length 1. The zeroth element of the list should be the Message

285

# object for the subpart. Extract that object, stringify it, and

286

# write it out.

R. David Murray

57c45ac

2010-02-21 04:39:40 +0000

[diff] [blame]

287

# Except, it turns out, when it's a string instead, which happens when

288

# and only when HeaderParser is used on a message of mime type

289

# message/rfc822. Such messages are generated by, for example,

290

# Groupwise when forwarding unadorned messages. (Issue 7970.) So

291

# in that case we just emit the string body.

292

payload = msg.get_payload()

293

if isinstance(payload, list):

294

g.flatten(msg.get_payload(0), unixfrom=False)

295

payload = s.getvalue()

296

self._fp.write(payload)

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

297

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

298

# This used to be a module level function; we use a classmethod for this

299

# and _compile_re so we can continue to provide the module level function

300

# for backward compatibility by doing

301

# _make_boudary = Generator._make_boundary

302

# at the end of the module. It *is* internal, so we could drop that...

303

@classmethod

304

def _make_boundary(cls, text=None):

305

# Craft a random boundary. If text is given, ensure that the chosen

306

# boundary doesn't appear in the text.

307

token = random.randrange(sys.maxsize)

308

boundary = ('=' * 15) + (_fmt % token) + '=='

if text is None:

return boundary

b = boundary

counter = 0

while True:

cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE)

315

if not cre.search(text):

316

break

317

b = boundary + '.' + str(counter)

counter += 1

return b

@classmethod

def _compile_re(cls, s, flags):

323

return re.compile(s, flags)

324

325

326

class BytesGenerator(Generator):

327

"""Generates a bytes version of a Message object tree.

328

329

Functionally identical to the base Generator except that the output is

330

bytes and not string. When surrogates were used in the input to encode

331

bytes, these are decoded back to bytes for output.

332

333

The outfp object must accept bytes in its write method.

334

"""

335

336

# Bytes versions of these constants for use in manipulating data from

337

# the BytesIO buffer.

338

_NL = NL.encode('ascii')

_EMPTY = b''

def write(self, s):

self._fp.write(s.encode('ascii', 'surrogateescape'))

343

344

def _new_buffer(self):

345

return BytesIO()

346

347

def _write_headers(self, msg):

348

# This is almost the same as the string version, except for handling

349

# strings with 8bit bytes.

350

for h, v in msg._headers:

351

self.write('%s: ' % h)

352

if isinstance(v, Header):

353

self.write(v.encode(maxlinelen=self._maxheaderlen)+NL)

354

elif _has_surrogates(v):

355

# If we have raw 8bit data in a byte string, we have no idea

356

# what the encoding is. There is no safe way to split this

357

# string. If it's ascii-subset, then we could do a normal

358

# ascii split, but if it's multibyte then we could break the

359

# string. There's no way to know so the least harm seems to

360

# be to not split the string and risk it being too long.

361

self.write(v+NL)

362

else:

363

# Header's got lots of smarts and this string is safe...

364

header = Header(v, maxlinelen=self._maxheaderlen,

365

header_name=h)

366

self.write(header.encode()+NL)

367

# A blank line always separates headers from body

368

self.write(NL)

369

370

def _handle_text(self, msg):

371

# If the string has surrogates the original source was bytes, so

372

# just write it back out.

373

if _has_surrogates(msg._payload):

374

self.write(msg._payload)

375

else:

376

super(BytesGenerator,self)._handle_text(msg)

377

378

@classmethod

379

def _compile_re(cls, s, flags):

380

return re.compile(s.encode('ascii'), flags)

381

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

382

383

384

_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'

385

386

class DecodedGenerator(Generator):

R. David Murray

70a9993

2010-10-01 20:38:33 +0000

[diff] [blame]

387

"""Generates a text representation of a message.

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

388

389

Like the Generator base class, except that non-text parts are substituted

390

with a format string representing the part.

391

"""

392

def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):

393

"""Like Generator.__init__() except that an additional optional

394

argument is allowed.

395

396

Walks through all subparts of a message. If the subpart is of main

397

type `text', then it prints the decoded payload of the subpart.

398

399

Otherwise, fmt is a format string that is used instead of the message

400

payload. fmt is expanded with the following keywords (in

401

%(keyword)s format):

402

403

type : Full MIME type of the non-text part

404

maintype : Main MIME type of the non-text part

405

subtype : Sub-MIME type of the non-text part

406

filename : Filename of the non-text part

407

description: Description associated with the non-text part

408

encoding : Content transfer encoding of the non-text part

409

410

The default value for fmt is None, meaning

411

412

[Non-text (%(type)s) part of message omitted, filename %(filename)s]

413

"""

414

Generator.__init__(self, outfp, mangle_from_, maxheaderlen)

if fmt is None:

self._fmt = _FMT

else:

self._fmt = fmt

def _dispatch(self, msg):

421

for part in msg.walk():

422

maintype = part.get_content_maintype()

423

if maintype == 'text':

Guido van Rossum

3172c5d

2007-10-16 18:12:55 +0000

[diff] [blame]

424

print(part.get_payload(decode=False), file=self)

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

425

elif maintype == 'multipart':

# Just skip this

pass

else:

print(self._fmt % {

'type' : part.get_content_type(),

431

'maintype' : part.get_content_maintype(),

432

'subtype' : part.get_content_subtype(),

433

'filename' : part.get_filename('[no filename]'),

434

'description': part.get('Content-Description',

435

'[no description]'),

436

'encoding' : part.get('Content-Transfer-Encoding',

'[no encoding]'),

}, file=self)

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame^]

442

# Helper used by Generator._make_boundary

Christian Heimes

a37d4c6

2007-12-04 23:02:19 +0000

[diff] [blame]

443

_width = len(repr(sys.maxsize-1))

Guido van Rossum

8b3febe

2007-08-30 01:15:14 +0000

[diff] [blame]

444

_fmt = '%%0%dd' % _width

445

R. David Murray

96fd54e

2010-10-08 15:55:28 +0000

[diff] [blame^]

446

# Backward compatibility

447

_make_boundary = Generator._make_boundary