Blame - Lib/email/Generator.py - platform/external/python/cpython3

2002-04-10 21:01:31 +0000

[diff] [blame]

1

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

2

# Author: barry@zope.com (Barry Warsaw)

3

4

"""Classes to generate plain text from a message object tree.

5

"""

6

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

7

import re

Barry Warsaw

db6888b

2003-05-29 19:39:33 +0000

[diff] [blame]

8

import sys

Barry Warsaw

5d384ef

2003-03-06 05:22:02 +0000

[diff] [blame]

9

import time

10

import locale

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

11

import random

12

Barry Warsaw

6c2bc46

2002-10-14 15:09:30 +0000

[diff] [blame]

13

from types import ListType, StringType

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

14

from cStringIO import StringIO

15

Barry Warsaw

062749a

2002-06-28 23:41:42 +0000

[diff] [blame]

16

from email.Header import Header

Barry Warsaw

5d384ef

2003-03-06 05:22:02 +0000

[diff] [blame]

17

from email.Parser import NLCRE

Barry Warsaw

062749a

2002-06-28 23:41:42 +0000

[diff] [blame]

18

Barry Warsaw

2002-09-10 16:13:45 +0000

[diff] [blame]

19

try:

20

from email._compat22 import _isstring

21

except SyntaxError:

22

from email._compat21 import _isstring

23

Barry Warsaw

2002-09-28 18:04:55 +0000

[diff] [blame]

try:

True, False

except NameError:

True = 1

False = 0

Barry Warsaw

2002-09-10 16:13:45 +0000

[diff] [blame]

29

Barry Warsaw

d1eeecb

2001-10-17 20:51:42 +0000

[diff] [blame]

30

EMPTYSTRING = ''

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

SEMISPACE = '; '

BAR = '|'

UNDERSCORE = '_'

NL = '\n'

Barry Warsaw

d1eeecb

2001-10-17 20:51:42 +0000

[diff] [blame]

35

NLTAB = '\n\t'

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

SEMINLTAB = ';\n\t'

SPACE8 = ' ' * 8

fcre = re.compile(r'^From ', re.MULTILINE)

40

Barry Warsaw

6c2bc46

2002-10-14 15:09:30 +0000

[diff] [blame]

41

def _is8bitstring(s):

42

if isinstance(s, StringType):

43

try:

44

unicode(s, 'us-ascii')

except UnicodeError:

return True

return False

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

49

Barry Warsaw

e968ead

2001-10-04 17:05:11 +0000

[diff] [blame]

50

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

51

class Generator:

52

"""Generates output from a Message object tree.

53

54

This basic generator writes the message to the given file object as plain

text.

"""

#

# Public interface

#

Barry Warsaw

2002-09-28 18:04:55 +0000

[diff] [blame]

61

def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

62

"""Create the generator for message flattening.

63

64

outfp is the output file-like object for writing the message to. It

65

must have a write() method.

66

Barry Warsaw

2002-09-28 18:04:55 +0000

[diff] [blame]

67

Optional mangle_from_ is a flag that, when True (the default), escapes

68

From_ lines in the body of the message by putting a `>' in front of

69

them.

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

70

71

Optional maxheaderlen specifies the longest length for a non-continued

72

header. When a header line is longer (in characters, with tabs

73

expanded to 8 spaces), than maxheaderlen, the header will be broken on

74

semicolons and continued as per RFC 2822. If no semicolon is found,

75

then the header is left alone. Set to zero to disable wrapping

76

headers. Default is 78, as recommended (but not required by RFC

2822.

"""

self._fp = outfp

self._mangle_from_ = mangle_from_

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

81

self.__maxheaderlen = maxheaderlen

82

83

def write(self, s):

84

# Just delegate to the file object

85

self._fp.write(s)

86

Barry Warsaw

2002-09-28 18:04:55 +0000

[diff] [blame]

87

def flatten(self, msg, unixfrom=False):

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

88

"""Print the message object tree rooted at msg to the output file

89

specified when the Generator instance was created.

90

91

unixfrom is a flag that forces the printing of a Unix From_ delimiter

92

before the first object in the message tree. If the original message

93

has no From_ delimiter, a `standard' one is crafted. By default, this

Barry Warsaw

2002-09-28 18:04:55 +0000

[diff] [blame]

94

is False to inhibit the printing of any From_ delimiter.

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

95

96

Note that for subobjects, no From_ line is printed.

97

"""

98

if unixfrom:

99

ufrom = msg.get_unixfrom()

100

if not ufrom:

101

ufrom = 'From nobody ' + time.ctime(time.time())

102

print >> self._fp, ufrom

103

self._write(msg)

104

Barry Warsaw

7dc865a

2002-06-02 19:02:37 +0000

[diff] [blame]

105

# For backwards compatibility, but this is slower

106

__call__ = flatten

107

Barry Warsaw

2002-07-09 02:43:47 +0000

[diff] [blame]

108

def clone(self, fp):

109

"""Clone this generator with the exact same options."""

110

return self.__class__(fp, self._mangle_from_, self.__maxheaderlen)

111

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

112

#

113

# Protected interface - undocumented ;/

114

#

115

116

def _write(self, msg):

117

# We can't write the headers yet because of the following scenario:

118

# say a multipart message includes the boundary string somewhere in

119

# its body. We'd have to calculate the new boundary /before/ we write

120

# the headers so that we can write the correct Content-Type:

121

# parameter.

122

#

123

# The way we do this, so as to make the _handle_*() methods simpler,

124

# is to cache any subpart writes into a StringIO. The we write the

125

# headers and the StringIO contents. That way, subpart handlers can

126

# Do The Right Thing, and can still modify the Content-Type: header if

# necessary.

oldfp = self._fp

try:

self._fp = sfp = StringIO()

self._dispatch(msg)

finally:

self._fp = oldfp

# Write the headers. First we see if the message object wants to

135

# handle that itself. If not, we'll do it generically.

136

meth = getattr(msg, '_write_headers', None)

137

if meth is None:

138

self._write_headers(msg)

139

else:

140

meth(self)

141

self._fp.write(sfp.getvalue())

142

143

def _dispatch(self, msg):

144

# Get the Content-Type: for the message, then try to dispatch to

Barry Warsaw

f488b2c

2002-07-11 18:48:40 +0000

[diff] [blame]

145

# self._handle_<maintype>_<subtype>(). If there's no handler for the

146

# full MIME type, then dispatch to self._handle_<maintype>(). If

147

# that's missing too, then dispatch to self._writeBody().

Barry Warsaw

dfea3b3

2002-08-20 14:47:30 +0000

[diff] [blame]

148

main = msg.get_content_maintype()

149

sub = msg.get_content_subtype()

Barry Warsaw

2002-07-09 02:43:47 +0000

[diff] [blame]

150

specific = UNDERSCORE.join((main, sub)).replace('-', '_')

151

meth = getattr(self, '_handle_' + specific, None)

152

if meth is None:

153

generic = main.replace('-', '_')

154

meth = getattr(self, '_handle_' + generic, None)

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

155

if meth is None:

Barry Warsaw

2002-07-09 02:43:47 +0000

[diff] [blame]

156

meth = self._writeBody

157

meth(msg)

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

#

# Default handlers

#

def _write_headers(self, msg):

164

for h, v in msg.items():

Barry Warsaw

ce6bf59

2003-03-07 15:43:17 +0000

[diff] [blame]

165

print >> self._fp, '%s:' % h,

166

if self.__maxheaderlen == 0:

167

# Explicit no-wrapping

168

print >> self._fp, v

169

elif isinstance(v, Header):

170

# Header instances know what to do

171

print >> self._fp, v.encode()

172

elif _is8bitstring(v):

173

# If we have raw 8bit data in a byte string, we have no idea

174

# what the encoding is. There is no safe way to split this

175

# string. If it's ascii-subset, then we could do a normal

176

# ascii split, but if it's multibyte then we could break the

177

# string. There's no way to know so the least harm seems to

178

# be to not split the string and risk it being too long.

179

print >> self._fp, v

180

else:

181

# Header's got lots of smarts, so use it.

182

print >> self._fp, Header(

183

v, maxlinelen=self.__maxheaderlen,

184

header_name=h, continuation_ws='\t').encode()

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

185

# A blank line always separates headers from body

186

print >> self._fp

187

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

188

#

189

# Handlers for writing types and subtypes

190

#

191

192

def _handle_text(self, msg):

193

payload = msg.get_payload()

Barry Warsaw

2001-09-26 05:32:41 +0000

[diff] [blame]

194

if payload is None:

195

return

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

196

cset = msg.get_charset()

197

if cset is not None:

198

payload = cset.body_encode(payload)

Barry Warsaw

2002-09-10 16:13:45 +0000

[diff] [blame]

199

if not _isstring(payload):

Barry Warsaw

2001-09-26 05:32:41 +0000

[diff] [blame]

200

raise TypeError, 'string payload expected: %s' % type(payload)

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

201

if self._mangle_from_:

202

payload = fcre.sub('>From ', payload)

203

self._fp.write(payload)

204

205

# Default body handler

206

_writeBody = _handle_text

207

Barry Warsaw

2002-07-09 02:43:47 +0000

[diff] [blame]

208

def _handle_multipart(self, msg):

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

209

# The trick here is to write out each part separately, merge them all

210

# together, and then make sure that the boundary we've chosen isn't

211

# present in the payload.

212

msgtexts = []

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

213

subparts = msg.get_payload()

214

if subparts is None:

Barry Warsaw

2002-07-09 02:43:47 +0000

[diff] [blame]

215

# Nothing has ever been attached

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

216

boundary = msg.get_boundary(failobj=_make_boundary())

217

print >> self._fp, '--' + boundary

218

print >> self._fp, '\n'

219

print >> self._fp, '--' + boundary + '--'

220

return

Barry Warsaw

2002-09-10 16:13:45 +0000

[diff] [blame]

221

elif _isstring(subparts):

222

# e.g. a non-strict parse of a message with no starting boundary.

223

self._fp.write(subparts)

224

return

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

225

elif not isinstance(subparts, ListType):

226

# Scalar payload

227

subparts = [subparts]

228

for part in subparts:

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

229

s = StringIO()

Barry Warsaw

2002-07-09 02:43:47 +0000

[diff] [blame]

230

g = self.clone(s)

Barry Warsaw

2002-09-28 18:04:55 +0000

[diff] [blame]

231

g.flatten(part, unixfrom=False)

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

232

msgtexts.append(s.getvalue())

233

# Now make sure the boundary we've selected doesn't appear in any of

234

# the message texts.

235

alltext = NL.join(msgtexts)

236

# BAW: What about boundaries that are wrapped in double-quotes?

237

boundary = msg.get_boundary(failobj=_make_boundary(alltext))

238

# If we had to calculate a new boundary because the body text

239

# contained that string, set the new boundary. We don't do it

240

# unconditionally because, while set_boundary() preserves order, it

241

# doesn't preserve newlines/continuations in headers. This is no big

242

# deal in practice, but turns out to be inconvenient for the unittest

243

# suite.

244

if msg.get_boundary() <> boundary:

245

msg.set_boundary(boundary)

246

# Write out any preamble

247

if msg.preamble is not None:

248

self._fp.write(msg.preamble)

Barry Warsaw

5d384ef

2003-03-06 05:22:02 +0000

[diff] [blame]

249

# If preamble is the empty string, the length of the split will be

250

# 1, but the last element will be the empty string. If it's

251

# anything else but does not end in a line separator, the length

252

# will be > 1 and not end in an empty string. We need to

253

# guarantee a newline after the preamble, but don't add too many.

254

plines = NLCRE.split(msg.preamble)

255

if plines <> [''] and plines[-1] <> '':

256

self._fp.write('\n')

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

257

# First boundary is a bit different; it doesn't have a leading extra

258

# newline.

259

print >> self._fp, '--' + boundary

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

260

# Join and write the individual parts

261

joiner = '\n--' + boundary + '\n'

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

262

self._fp.write(joiner.join(msgtexts))

263

print >> self._fp, '\n--' + boundary + '--',

264

# Write out any epilogue

265

if msg.epilogue is not None:

Barry Warsaw

856c32b

2001-10-19 04:06:39 +0000

[diff] [blame]

266

if not msg.epilogue.startswith('\n'):

267

print >> self._fp

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

268

self._fp.write(msg.epilogue)

269

Barry Warsaw

2001-09-26 05:32:41 +0000

[diff] [blame]

270

def _handle_message_delivery_status(self, msg):

271

# We can't just write the headers directly to self's file object

272

# because this will leave an extra newline between the last header

273

# block and the boundary. Sigh.

274

blocks = []

275

for part in msg.get_payload():

276

s = StringIO()

Barry Warsaw

2002-07-09 02:43:47 +0000

[diff] [blame]

277

g = self.clone(s)

Barry Warsaw

2002-09-28 18:04:55 +0000

[diff] [blame]

278

g.flatten(part, unixfrom=False)

Barry Warsaw

2001-09-26 05:32:41 +0000

[diff] [blame]

279

text = s.getvalue()

280

lines = text.split('\n')

281

# Strip off the unnecessary trailing empty line

282

if lines and lines[-1] == '':

283

blocks.append(NL.join(lines[:-1]))

284

else:

285

blocks.append(text)

286

# Now join all the blocks with an empty line. This has the lovely

287

# effect of separating each block with an empty line, but not adding

288

# an extra one after the last one.

289

self._fp.write(NL.join(blocks))

290

291

def _handle_message(self, msg):

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

292

s = StringIO()

Barry Warsaw

2002-07-09 02:43:47 +0000

[diff] [blame]

293

g = self.clone(s)

Barry Warsaw

7dc865a

2002-06-02 19:02:37 +0000

[diff] [blame]

294

# The payload of a message/rfc822 part should be a multipart sequence

295

# of length 1. The zeroth element of the list should be the Message

Barry Warsaw

2002-07-09 02:43:47 +0000

[diff] [blame]

296

# object for the subpart. Extract that object, stringify it, and

297

# write it out.

Barry Warsaw

2002-09-28 18:04:55 +0000

[diff] [blame]

298

g.flatten(msg.get_payload(0), unixfrom=False)

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

299

self._fp.write(s.getvalue())

300

301

Barry Warsaw

e968ead

2001-10-04 17:05:11 +0000

[diff] [blame]

302

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

303

class DecodedGenerator(Generator):

304

"""Generator a text representation of a message.

305

306

Like the Generator base class, except that non-text parts are substituted

307

with a format string representing the part.

308

"""

Barry Warsaw

2002-09-28 18:04:55 +0000

[diff] [blame]

309

def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

310

"""Like Generator.__init__() except that an additional optional

311

argument is allowed.

312

313

Walks through all subparts of a message. If the subpart is of main

314

type `text', then it prints the decoded payload of the subpart.

315

316

Otherwise, fmt is a format string that is used instead of the message

317

payload. fmt is expanded with the following keywords (in

318

%(keyword)s format):

319

320

type : Full MIME type of the non-text part

321

maintype : Main MIME type of the non-text part

322

subtype : Sub-MIME type of the non-text part

323

filename : Filename of the non-text part

324

description: Description associated with the non-text part

325

encoding : Content transfer encoding of the non-text part

326

327

The default value for fmt is None, meaning

328

329

[Non-text (%(type)s) part of message omitted, filename %(filename)s]

330

"""

331

Generator.__init__(self, outfp, mangle_from_, maxheaderlen)

332

if fmt is None:

333

fmt = ('[Non-text (%(type)s) part of message omitted, '

334

'filename %(filename)s]')

335

self._fmt = fmt

336

337

def _dispatch(self, msg):

338

for part in msg.walk():

Barry Warsaw

2001-09-26 05:32:41 +0000

[diff] [blame]

339

maintype = part.get_main_type('text')

340

if maintype == 'text':

Barry Warsaw

2002-09-28 18:04:55 +0000

[diff] [blame]

341

print >> self, part.get_payload(decode=True)

Barry Warsaw

2001-09-26 05:32:41 +0000

[diff] [blame]

342

elif maintype == 'multipart':

343

# Just skip this

344

pass

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

345

else:

346

print >> self, self._fmt % {

347

'type' : part.get_type('[no MIME type]'),

348

'maintype' : part.get_main_type('[no main MIME type]'),

349

'subtype' : part.get_subtype('[no sub-MIME type]'),

350

'filename' : part.get_filename('[no filename]'),

351

'description': part.get('Content-Description',

352

'[no description]'),

353

'encoding' : part.get('Content-Transfer-Encoding',

'[no encoding]'),

}

Barry Warsaw

2001-10-04 17:05:11 +0000

[diff] [blame]

358

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

359

# Helper

Barry Warsaw

db6888b

2003-05-29 19:39:33 +0000

[diff] [blame]

360

_width = len(repr(sys.maxint-1))

361

_fmt = '%%0%dd' % _width

362

Barry Warsaw

2002-04-10 21:01:31 +0000

[diff] [blame]

363

def _make_boundary(text=None):

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

364

# Craft a random boundary. If text is given, ensure that the chosen

365

# boundary doesn't appear in the text.

Barry Warsaw

663219a

2003-06-24 20:19:34 +0000

[diff] [blame]

366

token = random.randrange(sys.maxint)

Barry Warsaw

db6888b

2003-05-29 19:39:33 +0000

[diff] [blame]

367

boundary = ('=' * 15) + (_fmt % token) + '=='

Barry Warsaw

2001-09-23 03:17:28 +0000

[diff] [blame]

if text is None:

return boundary

b = boundary

counter = 0

Barry Warsaw

2002-09-28 18:04:55 +0000

[diff] [blame]

372

while True:

Barry Warsaw