Blame - Lib/email/generator.py - platform/external/python/cpython3

2007-08-30 01:15:14 +0000

[diff] [blame]

2

# Author: Barry Warsaw

3

# Contact: email-sig@python.org

4

5

"""Classes to generate plain text from a message object tree."""

6

7

__all__ = ['Generator', 'DecodedGenerator']

import re

import sys

import time

import random

import warnings

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame]

15

from io import StringIO, BytesIO

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

16

from email.header import Header

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame]

17

from email.message import _has_surrogates

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

18

19

UNDERSCORE = '_'

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

20

NL = '\n' # XXX: no longer used by the code below.

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

21

22

fcre = re.compile(r'^From ', re.MULTILINE)

class Generator:

"""Generates output from a Message object tree.

28

29

This basic generator writes the message to the given file object as plain

text.

"""

#

# Public interface

#

def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):

37

"""Create the generator for message flattening.

38

39

outfp is the output file-like object for writing the message to. It

40

must have a write() method.

41

42

Optional mangle_from_ is a flag that, when True (the default), escapes

43

From_ lines in the body of the message by putting a `>' in front of

44

them.

45

46

Optional maxheaderlen specifies the longest length for a non-continued

47

header. When a header line is longer (in characters, with tabs

48

expanded to 8 spaces) than maxheaderlen, the header will split as

49

defined in the Header class. Set maxheaderlen to zero to disable

50

header wrapping. The default is 78, as recommended (but not required)

by RFC 2822.

"""

self._fp = outfp

self._mangle_from_ = mangle_from_

55

self._maxheaderlen = maxheaderlen

56

57

def write(self, s):

58

# Just delegate to the file object

59

self._fp.write(s)

60

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

61

def flatten(self, msg, unixfrom=False, linesep='\n'):

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

62

"""Print the message object tree rooted at msg to the output file

63

specified when the Generator instance was created.

64

65

unixfrom is a flag that forces the printing of a Unix From_ delimiter

66

before the first object in the message tree. If the original message

67

has no From_ delimiter, a `standard' one is crafted. By default, this

68

is False to inhibit the printing of any From_ delimiter.

69

70

Note that for subobjects, no From_ line is printed.

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

71

72

linesep specifies the characters used to indicate a new line in

73

the output.

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

74

"""

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

75

# We use the _XXX constants for operating on data that comes directly

76

# from the msg, and _encoded_XXX constants for operating on data that

77

# has already been converted (to bytes in the BytesGenerator) and

78

# inserted into a temporary buffer.

79

self._NL = linesep

80

self._encoded_NL = self._encode(linesep)

81

self._EMPTY = ''

82

self._encoded_EMTPY = self._encode('')

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

83

if unixfrom:

84

ufrom = msg.get_unixfrom()

85

if not ufrom:

86

ufrom = 'From nobody ' + time.ctime(time.time())

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

87

self.write(ufrom + self._NL)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

self._write(msg)

def clone(self, fp):

"""Clone this generator with the exact same options."""

92

return self.__class__(fp, self._mangle_from_, self._maxheaderlen)

93

94

#

95

# Protected interface - undocumented ;/

96

#

97

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame]

98

# Note that we use 'self.write' when what we are writing is coming from

99

# the source, and self._fp.write when what we are writing is coming from a

100

# buffer (because the Bytes subclass has already had a chance to transform

101

# the data in its write method in that case). This is an entirely

102

# pragmatic split determined by experiment; we could be more general by

103

# always using write and having the Bytes subclass write method detect when

104

# it has already transformed the input; but, since this whole thing is a

105

# hack anyway this seems good enough.

106

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

107

# Similarly, we have _XXX and _encoded_XXX attributes that are used on

108

# source and buffer data, respectively.

109

_encoded_EMPTY = ''

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame]

110

111

def _new_buffer(self):

112

# BytesGenerator overrides this to return BytesIO.

113

return StringIO()

114

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

115

def _encode(self, s):

116

# BytesGenerator overrides this to encode strings to bytes.

117

return s

118

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

119

def _write(self, msg):

120

# We can't write the headers yet because of the following scenario:

121

# say a multipart message includes the boundary string somewhere in

122

# its body. We'd have to calculate the new boundary /before/ we write

123

# the headers so that we can write the correct Content-Type:

124

# parameter.

125

#

126

# The way we do this, so as to make the _handle_*() methods simpler,

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame]

127

# is to cache any subpart writes into a buffer. The we write the

128

# headers and the buffer contents. That way, subpart handlers can

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

129

# Do The Right Thing, and can still modify the Content-Type: header if

130

# necessary.

131

oldfp = self._fp

132

try:

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame]

133

self._fp = sfp = self._new_buffer()

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

self._dispatch(msg)

finally:

self._fp = oldfp

# Write the headers. First we see if the message object wants to

138

# handle that itself. If not, we'll do it generically.

139

meth = getattr(msg, '_write_headers', None)

140

if meth is None:

141

self._write_headers(msg)

142

else:

143

meth(self)

144

self._fp.write(sfp.getvalue())

145

146

def _dispatch(self, msg):

147

# Get the Content-Type: for the message, then try to dispatch to

148

# self._handle_<maintype>_<subtype>(). If there's no handler for the

149

# full MIME type, then dispatch to self._handle_<maintype>(). If

150

# that's missing too, then dispatch to self._writeBody().

151

main = msg.get_content_maintype()

152

sub = msg.get_content_subtype()

153

specific = UNDERSCORE.join((main, sub)).replace('-', '_')

154

meth = getattr(self, '_handle_' + specific, None)

155

if meth is None:

156

generic = main.replace('-', '_')

157

meth = getattr(self, '_handle_' + generic, None)

158

if meth is None:

159

meth = self._writeBody

meth(msg)

#

# Default handlers

#

def _write_headers(self, msg):

167

for h, v in msg.items():

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame]

168

self.write('%s: ' % h)

Guido van Rossum

9604e66

2007-08-30 03:46:43 +0000

[diff] [blame]

169

if isinstance(v, Header):

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

170

self.write(v.encode(

171

maxlinelen=self._maxheaderlen, linesep=self._NL)+self._NL)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

172

else:

173

# Header's got lots of smarts, so use it.

174

header = Header(v, maxlinelen=self._maxheaderlen,

Barry Warsaw

70d61ce

2009-03-30 23:12:30 +0000

[diff] [blame]

175

header_name=h)

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

176

self.write(header.encode(linesep=self._NL)+self._NL)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

177

# A blank line always separates headers from body

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

178

self.write(self._NL)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

179

180

#

181

# Handlers for writing types and subtypes

182

#

183

184

def _handle_text(self, msg):

185

payload = msg.get_payload()

186

if payload is None:

187

return

Guido van Rossum

3172c5d

2007-10-16 18:12:55 +0000

[diff] [blame]

188

if not isinstance(payload, str):

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

189

raise TypeError('string payload expected: %s' % type(payload))

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame]

190

if _has_surrogates(msg._payload):

191

charset = msg.get_param('charset')

192

if charset is not None:

193

del msg['content-transfer-encoding']

194

msg.set_payload(payload, charset)

195

payload = msg.get_payload()

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

196

if self._mangle_from_:

197

payload = fcre.sub('>From ', payload)

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame]

198

self.write(payload)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

199

200

# Default body handler

201

_writeBody = _handle_text

202

203

def _handle_multipart(self, msg):

204

# The trick here is to write out each part separately, merge them all

205

# together, and then make sure that the boundary we've chosen isn't

206

# present in the payload.

207

msgtexts = []

208

subparts = msg.get_payload()

209

if subparts is None:

210

subparts = []

Guido van Rossum

3172c5d

2007-10-16 18:12:55 +0000

[diff] [blame]

211

elif isinstance(subparts, str):

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

212

# e.g. a non-strict parse of a message with no starting boundary.

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame]

213

self.write(subparts)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

214

return

215

elif not isinstance(subparts, list):

216

# Scalar payload

217

subparts = [subparts]

218

for part in subparts:

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame]

219

s = self._new_buffer()

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

220

g = self.clone(s)

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

221

g.flatten(part, unixfrom=False, linesep=self._NL)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

222

msgtexts.append(s.getvalue())

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

223

# BAW: What about boundaries that are wrapped in double-quotes?

R. David Murray

5260a9b

2010-12-12 20:06:19 +0000

[diff] [blame^]

224

boundary = msg.get_boundary()

225

if not boundary:

226

# Create a boundary that doesn't appear in any of the

227

# message texts.

228

alltext = self._encoded_NL.join(msgtexts)

229

msg.set_boundary(self._make_boundary(alltext))

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

230

# If there's a preamble, write it out, with a trailing CRLF

231

if msg.preamble is not None:

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

232

self.write(msg.preamble + self._NL)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

233

# dash-boundary transport-padding CRLF

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

234

self.write('--' + boundary + self._NL)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

235

# body-part

236

if msgtexts:

237

self._fp.write(msgtexts.pop(0))

238

# *encapsulation

239

# --> delimiter transport-padding

240

# --> CRLF body-part

241

for body_part in msgtexts:

242

# delimiter transport-padding CRLF

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

243

self.write(self._NL + '--' + boundary + self._NL)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

244

# body-part

245

self._fp.write(body_part)

246

# close-delimiter transport-padding

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

247

self.write(self._NL + '--' + boundary + '--')

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

248

if msg.epilogue is not None:

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

249

self.write(self._NL)

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame]

250

self.write(msg.epilogue)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

251

R. David Murray

a8f480f

2010-01-16 18:30:03 +0000

[diff] [blame]

252

def _handle_multipart_signed(self, msg):

253

# The contents of signed parts has to stay unmodified in order to keep

254

# the signature intact per RFC1847 2.1, so we disable header wrapping.

255

# RDM: This isn't enough to completely preserve the part, but it helps.

256

old_maxheaderlen = self._maxheaderlen

257

try:

258

self._maxheaderlen = 0

259

self._handle_multipart(msg)

260

finally:

261

self._maxheaderlen = old_maxheaderlen

262

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

263

def _handle_message_delivery_status(self, msg):

264

# We can't just write the headers directly to self's file object

265

# because this will leave an extra newline between the last header

266

# block and the boundary. Sigh.

267

blocks = []

268

for part in msg.get_payload():

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame]

269

s = self._new_buffer()

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

270

g = self.clone(s)

R. David Murray

719a449

2010-11-21 16:53:48 +0000

[diff] [blame]

271

g.flatten(part, unixfrom=False, linesep=self._NL)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

272

text = s.getvalue()

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

273

lines = text.split(self._encoded_NL)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

274

# Strip off the unnecessary trailing empty line

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

275

if lines and lines[-1] == self._encoded_EMPTY:

276

blocks.append(self._encoded_NL.join(lines[:-1]))

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

277

else:

278

blocks.append(text)

279

# Now join all the blocks with an empty line. This has the lovely

280

# effect of separating each block with an empty line, but not adding

281

# an extra one after the last one.

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

282

self._fp.write(self._encoded_NL.join(blocks))

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

283

284

def _handle_message(self, msg):

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame]

285

s = self._new_buffer()

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

286

g = self.clone(s)

287

# The payload of a message/rfc822 part should be a multipart sequence

288

# of length 1. The zeroth element of the list should be the Message

289

# object for the subpart. Extract that object, stringify it, and

290

# write it out.

R. David Murray

57c45ac

2010-02-21 04:39:40 +0000

[diff] [blame]

291

# Except, it turns out, when it's a string instead, which happens when

292

# and only when HeaderParser is used on a message of mime type

293

# message/rfc822. Such messages are generated by, for example,

294

# Groupwise when forwarding unadorned messages. (Issue 7970.) So

295

# in that case we just emit the string body.

296

payload = msg.get_payload()

297

if isinstance(payload, list):

R. David Murray

719a449

2010-11-21 16:53:48 +0000

[diff] [blame]

298

g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)

R. David Murray

57c45ac

2010-02-21 04:39:40 +0000

[diff] [blame]

299

payload = s.getvalue()

300

self._fp.write(payload)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

301

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame]

302

# This used to be a module level function; we use a classmethod for this

303

# and _compile_re so we can continue to provide the module level function

304

# for backward compatibility by doing

305

# _make_boudary = Generator._make_boundary

306

# at the end of the module. It *is* internal, so we could drop that...

307

@classmethod

308

def _make_boundary(cls, text=None):

309

# Craft a random boundary. If text is given, ensure that the chosen

310

# boundary doesn't appear in the text.

311

token = random.randrange(sys.maxsize)

312

boundary = ('=' * 15) + (_fmt % token) + '=='

if text is None:

return boundary

b = boundary

counter = 0

while True:

cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE)

319

if not cre.search(text):

320

break

321

b = boundary + '.' + str(counter)

counter += 1

return b

@classmethod

def _compile_re(cls, s, flags):

327

return re.compile(s, flags)

328

329

330

class BytesGenerator(Generator):

331

"""Generates a bytes version of a Message object tree.

332

333

Functionally identical to the base Generator except that the output is

334

bytes and not string. When surrogates were used in the input to encode

335

bytes, these are decoded back to bytes for output.

336

337

The outfp object must accept bytes in its write method.

338

"""

339

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

340

# Bytes versions of this constant for use in manipulating data from

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame]

341

# the BytesIO buffer.

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

342

_encoded_EMPTY = b''

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame]

343

344

def write(self, s):

345

self._fp.write(s.encode('ascii', 'surrogateescape'))

346

347

def _new_buffer(self):

348

return BytesIO()

349

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

350

def _encode(self, s):

351

return s.encode('ascii')

352

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame]

353

def _write_headers(self, msg):

354

# This is almost the same as the string version, except for handling

355

# strings with 8bit bytes.

356

for h, v in msg._headers:

357

self.write('%s: ' % h)

358

if isinstance(v, Header):

359

self.write(v.encode(maxlinelen=self._maxheaderlen)+NL)

360

elif _has_surrogates(v):

361

# If we have raw 8bit data in a byte string, we have no idea

362

# what the encoding is. There is no safe way to split this

363

# string. If it's ascii-subset, then we could do a normal

364

# ascii split, but if it's multibyte then we could break the

365

# string. There's no way to know so the least harm seems to

366

# be to not split the string and risk it being too long.

367

self.write(v+NL)

368

else:

369

# Header's got lots of smarts and this string is safe...

370

header = Header(v, maxlinelen=self._maxheaderlen,

371

header_name=h)

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

372

self.write(header.encode(linesep=self._NL)+self._NL)

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame]

373

# A blank line always separates headers from body

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

374

self.write(self._NL)

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame]

375

376

def _handle_text(self, msg):

377

# If the string has surrogates the original source was bytes, so

378

# just write it back out.

379

if _has_surrogates(msg._payload):

380

self.write(msg._payload)

381

else:

382

super(BytesGenerator,self)._handle_text(msg)

383

384

@classmethod

385

def _compile_re(cls, s, flags):

386

return re.compile(s.encode('ascii'), flags)

387

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

388

389

390

_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'

391

392

class DecodedGenerator(Generator):

R. David Murray

70a9993

2010-10-01 20:38:33 +0000

[diff] [blame]

393

"""Generates a text representation of a message.

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

394

395

Like the Generator base class, except that non-text parts are substituted

396

with a format string representing the part.

397

"""

398

def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):

399

"""Like Generator.__init__() except that an additional optional

400

argument is allowed.

401

402

Walks through all subparts of a message. If the subpart is of main

403

type `text', then it prints the decoded payload of the subpart.

404

405

Otherwise, fmt is a format string that is used instead of the message

406

payload. fmt is expanded with the following keywords (in

407

%(keyword)s format):

408

409

type : Full MIME type of the non-text part

410

maintype : Main MIME type of the non-text part

411

subtype : Sub-MIME type of the non-text part

412

filename : Filename of the non-text part

413

description: Description associated with the non-text part

414

encoding : Content transfer encoding of the non-text part

415

416

The default value for fmt is None, meaning

417

418

[Non-text (%(type)s) part of message omitted, filename %(filename)s]

419

"""

420

Generator.__init__(self, outfp, mangle_from_, maxheaderlen)

if fmt is None:

self._fmt = _FMT

else:

self._fmt = fmt

def _dispatch(self, msg):

427

for part in msg.walk():

428

maintype = part.get_content_maintype()

429

if maintype == 'text':

Guido van Rossum

3172c5d

2007-10-16 18:12:55 +0000

[diff] [blame]

430

print(part.get_payload(decode=False), file=self)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

431

elif maintype == 'multipart':

# Just skip this

pass

else:

print(self._fmt % {

'type' : part.get_content_type(),

437

'maintype' : part.get_content_maintype(),

438

'subtype' : part.get_content_subtype(),

439

'filename' : part.get_filename('[no filename]'),

440

'description': part.get('Content-Description',

441

'[no description]'),

442

'encoding' : part.get('Content-Transfer-Encoding',

'[no encoding]'),

}, file=self)

R. David Murray

2010-10-08 15:55:28 +0000

[diff] [blame]

448

# Helper used by Generator._make_boundary

Christian Heimes

a37d4c6

2007-12-04 23:02:19 +0000

[diff] [blame]

449

_width = len(repr(sys.maxsize-1))

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

450

_fmt = '%%0%dd' % _width

451

R. David Murray