Blame - Lib/email/header.py - platform/external/python/cpython2

2007-08-30 01:15:14 +0000

[diff] [blame]

1

2

# Author: Ben Gertzfield, Barry Warsaw

3

# Contact: email-sig@python.org

4

5

"""Header encoding and decoding functionality."""

__all__ = [

'Header',

'decode_header',

'make_header',

]

import re

import binascii

import email.quoprimime

17

import email.base64mime

18

19

from email.errors import HeaderParseError

20

from email.charset import Charset

NL = '\n'

SPACE = ' '

BSPACE = b' '

SPACE8 = ' ' * 8

EMPTYSTRING = ''

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

27

MAXLINELEN = 78

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

28

29

USASCII = Charset('us-ascii')

30

UTF8 = Charset('utf-8')

31

32

# Match encoded-word strings in the form =?charset?q?Hello_World?=

33

ecre = re.compile(r'''

34

=\? # literal =?

35

(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset

36

\? # literal ?

37

(?P<encoding>[qb]) # either a "q" or a "b", case insensitive

38

\? # literal ?

39

(?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string

40

\?= # literal ?=

41

(?=[ \t]|$) # whitespace or the end of the string

42

''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)

43

44

# Field name regexp, including trailing colon, but not separating whitespace,

45

# according to RFC 2822. Character range is from tilde to exclamation mark.

46

# For use with .match()

47

fcre = re.compile(r'[\041-\176]+:$')

48

Ezio Melotti

1392500

2011-03-16 11:05:33 +0200

[diff] [blame]

49

# Find a header embedded in a putative header value. Used to check for

R. David Murray

389af00

2011-01-09 02:48:04 +0000

[diff] [blame]

50

# header injection attack.

51

_embeded_header = re.compile(r'\n[^ \t]+:')

52

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

# Helpers

_max_append = email.quoprimime._max_append

def decode_header(header):

61

"""Decode a message header value without converting charset.

62

63

Returns a list of (string, charset) pairs containing each of the decoded

64

parts of the header. Charset is None for non-encoded parts of the header,

65

otherwise a lower-case string containing the name of the character set

66

specified in the encoded string.

67

68

An email.Errors.HeaderParseError may be raised when certain decoding error

69

occurs (e.g. a base64 decoding exception).

70

"""

71

# If no encoding, just return the header with no charset.

72

if not ecre.search(header):

73

return [(header, None)]

74

# First step is to parse all the encoded parts into triplets of the form

75

# (encoded_string, encoding, charset). For unencoded strings, the last

76

# two parts will be None.

77

words = []

78

for line in header.splitlines():

79

parts = ecre.split(line)

80

while parts:

81

unencoded = parts.pop(0).strip()

82

if unencoded:

83

words.append((unencoded, None, None))

84

if parts:

85

charset = parts.pop(0).lower()

86

encoding = parts.pop(0).lower()

87

encoded = parts.pop(0)

88

words.append((encoded, encoding, charset))

89

# The next step is to decode each encoded word by applying the reverse

90

# base64 or quopri transformation. decoded_words is now a list of the

91

# form (decoded_word, charset).

92

decoded_words = []

93

for encoded_string, encoding, charset in words:

94

if encoding is None:

95

# This is an unencoded word.

96

decoded_words.append((encoded_string, charset))

97

elif encoding == 'q':

98

word = email.quoprimime.header_decode(encoded_string)

99

decoded_words.append((word, charset))

100

elif encoding == 'b':

R. David Murray

e06528c

2010-08-03 23:35:44 +0000

[diff] [blame]

101

paderr = len(encoded_string) % 4 # Postel's law: add missing padding

102

if paderr:

103

encoded_string += '==='[:4 - paderr]

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

104

try:

105

word = email.base64mime.decode(encoded_string)

106

except binascii.Error:

107

raise HeaderParseError('Base64 decoding error')

108

else:

109

decoded_words.append((word, charset))

110

else:

111

raise AssertionError('Unexpected encoding: ' + encoding)

112

# Now convert all words to bytes and collapse consecutive runs of

113

# similarly encoded words.

114

collapsed = []

115

last_word = last_charset = None

116

for word, charset in decoded_words:

117

if isinstance(word, str):

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

118

word = bytes(word, 'raw-unicode-escape')

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

119

if last_word is None:

120

last_word = word

121

last_charset = charset

122

elif charset != last_charset:

123

collapsed.append((last_word, last_charset))

124

last_word = word

125

last_charset = charset

126

elif last_charset is None:

127

last_word += BSPACE + word

128

else:

129

last_word += word

130

collapsed.append((last_word, last_charset))

return collapsed

def make_header(decoded_seq, maxlinelen=None, header_name=None,

136

continuation_ws=' '):

137

"""Create a Header from a sequence of pairs as returned by decode_header()

138

139

decode_header() takes a header value string and returns a sequence of

140

pairs of the format (decoded_string, charset) where charset is the string

141

name of the character set.

142

143

This function takes one of those sequence of pairs and returns a Header

144

instance. Optional maxlinelen, header_name, and continuation_ws are as in

145

the Header constructor.

146

"""

147

h = Header(maxlinelen=maxlinelen, header_name=header_name,

148

continuation_ws=continuation_ws)

149

for s, charset in decoded_seq:

150

# None means us-ascii but we can simply pass it on to h.append()

151

if charset is not None and not isinstance(charset, Charset):

152

charset = Charset(charset)

h.append(s, charset)

return h

class Header:

def __init__(self, s=None, charset=None,

160

maxlinelen=None, header_name=None,

161

continuation_ws=' ', errors='strict'):

162

"""Create a MIME-compliant header that can contain many character sets.

163

164

Optional s is the initial header value. If None, the initial header

165

value is not set. You can later append to the header with .append()

166

method calls. s may be a byte string or a Unicode string, but see the

167

.append() documentation for semantics.

168

169

Optional charset serves two purposes: it has the same meaning as the

170

charset argument to the .append() method. It also sets the default

171

character set for all subsequent .append() calls that omit the charset

172

argument. If charset is not provided in the constructor, the us-ascii

173

charset is used both as s's initial charset and as the default for

174

subsequent .append() calls.

175

R. David Murray

5723d22

2010-12-29 19:03:53 +0000

[diff] [blame]

176

The maximum line length can be specified explicitly via maxlinelen. For

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

177

splitting the first line to a shorter value (to account for the field

178

header which isn't included in s, e.g. `Subject') pass in the name of

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

179

the field in header_name. The default maxlinelen is 78 as recommended

180

by RFC 2822.

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

181

182

continuation_ws must be RFC 2822 compliant folding whitespace (usually

183

either a space or a hard tab) which will be prepended to continuation

184

lines.

185

186

errors is passed through to the .append() call.

"""

if charset is None:

charset = USASCII

elif not isinstance(charset, Charset):

191

charset = Charset(charset)

192

self._charset = charset

193

self._continuation_ws = continuation_ws

194

self._chunks = []

195

if s is not None:

196

self.append(s, charset, errors)

197

if maxlinelen is None:

198

maxlinelen = MAXLINELEN

199

self._maxlinelen = maxlinelen

200

if header_name is None:

201

self._headerlen = 0

202

else:

203

# Take the separating colon and space into account.

204

self._headerlen = len(header_name) + 2

205

206

def __str__(self):

207

"""Return the string value of the header."""

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

208

self._normalize()

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

209

uchunks = []

210

lastcs = None

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

211

for string, charset in self._chunks:

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

212

# We must preserve spaces between encoded and non-encoded word

213

# boundaries, which means for us we need to add a space when we go

214

# from a charset to None/us-ascii, or from None/us-ascii to a

215

# charset. Only do this for the second and subsequent chunks.

216

nextcs = charset

217

if uchunks:

218

if lastcs not in (None, 'us-ascii'):

219

if nextcs in (None, 'us-ascii'):

220

uchunks.append(SPACE)

221

nextcs = None

222

elif nextcs not in (None, 'us-ascii'):

223

uchunks.append(SPACE)

224

lastcs = nextcs

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

225

uchunks.append(string)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

226

return EMPTYSTRING.join(uchunks)

227

228

# Rich comparison operators for equality only. BAW: does it make sense to

229

# have or explicitly disable <, <=, >, >= operators?

230

def __eq__(self, other):

231

# other may be a Header or a string. Both are fine so coerce

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

232

# ourselves to a unicode (of the unencoded header value), swap the

233

# args and do another comparison.

234

return other == str(self)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

235

236

def __ne__(self, other):

237

return not self == other

238

239

def append(self, s, charset=None, errors='strict'):

240

"""Append a string to the MIME header.

241

242

Optional charset, if given, should be a Charset instance or the name

243

of a character set (which will be converted to a Charset instance). A

244

value of None (the default) means that the charset given in the

245

constructor is used.

246

247

s may be a byte string or a Unicode string. If it is a byte string

R. David Murray

5723d22

2010-12-29 19:03:53 +0000

[diff] [blame]

248

(i.e. isinstance(s, str) is false), then charset is the encoding of

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

249

that byte string, and a UnicodeError will be raised if the string

250

cannot be decoded with that charset. If s is a Unicode string, then

251

charset is a hint specifying the character set of the characters in

R. David Murray

2011-01-05 01:47:38 +0000

[diff] [blame]

252

the string. In either case, when producing an RFC 2822 compliant

253

header using RFC 2047 rules, the string will be encoded using the

254

output codec of the charset. If the string cannot be encoded to the

255

output codec, a UnicodeError will be raised.

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

256

R. David Murray

2011-01-05 01:47:38 +0000

[diff] [blame]

257

Optional `errors' is passed as the errors argument to the decode

258

call if s is a byte string.

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

259

"""

260

if charset is None:

261

charset = self._charset

262

elif not isinstance(charset, Charset):

263

charset = Charset(charset)

R. David Murray

2011-01-05 01:47:38 +0000

[diff] [blame]

264

if not isinstance(s, str):

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

265

input_charset = charset.input_codec or 'us-ascii'

R. David Murray

2011-01-05 01:47:38 +0000

[diff] [blame]

266

s = s.decode(input_charset, errors)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

267

# Ensure that the bytes we're storing can be decoded to the output

268

# character set, otherwise an early error is thrown.

269

output_charset = charset.output_codec or 'us-ascii'

R. David Murray

2011-01-05 01:47:38 +0000

[diff] [blame]

270

s.encode(output_charset, errors)

271

self._chunks.append((s, charset))

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

272

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

273

def encode(self, splitchars=';, \t', maxlinelen=None):

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

274

"""Encode a message header into an RFC-compliant format.

275

276

There are many issues involved in converting a given string for use in

277

an email header. Only certain character sets are readable in most

278

email clients, and as header strings can only contain a subset of

279

7-bit ASCII, care must be taken to properly convert and encode (with

280

Base64 or quoted-printable) header strings. In addition, there is a

281

75-character length limit on any given encoded header field, so

282

line-wrapping must be performed, even with double-byte character sets.

283

284

This method will do its best to convert the string to the correct

285

character set used in email, and encode and line wrap it safely with

286

the appropriate scheme for that character set.

287

288

If the given charset is not known or an error occurs during

289

conversion, this function will return the header untouched.

290

291

Optional splitchars is a string containing characters to split long

292

ASCII lines on, in rough support of RFC 2822's `highest level

293

syntactic breaks'. This doesn't affect RFC 2047 encoded lines.

294

"""

295

self._normalize()

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

296

if maxlinelen is None:

297

maxlinelen = self._maxlinelen

298

# A maxlinelen of 0 means don't wrap. For all practical purposes,

299

# choosing a huge number here accomplishes that and makes the

300

# _ValueFormatter algorithm much simpler.

301

if maxlinelen == 0:

302

maxlinelen = 1000000

303

formatter = _ValueFormatter(self._headerlen, maxlinelen,

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

304

self._continuation_ws, splitchars)

305

for string, charset in self._chunks:

306

lines = string.splitlines()

R. David Murray

43b2f45

2011-02-11 03:13:19 +0000

[diff] [blame]

307

formatter.feed(lines[0], charset)

308

for line in lines[1:]:

309

formatter.newline()

310

if charset.header_encoding is not None:

311

formatter.feed(self._continuation_ws, USASCII)

312

line = ' ' + line.lstrip()

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

313

formatter.feed(line, charset)

R. David Murray

43b2f45

2011-02-11 03:13:19 +0000

[diff] [blame]

314

if len(lines) > 1:

315

formatter.newline()

Barry Warsaw

2007-08-31 02:35:00 +0000

[diff] [blame]

316

formatter.add_transition()

R. David Murray

389af00

2011-01-09 02:48:04 +0000

[diff] [blame]

317

value = str(formatter)

318

if _embeded_header.search(value):

319

raise HeaderParseError("header value appears to contain "

320

"an embedded header: {!r}".format(value))

321

return value

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

322

323

def _normalize(self):

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

324

# Step 1: Normalize the chunks so that all runs of identical charsets

325

# get collapsed into a single unicode string.

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

chunks = []

last_charset = None

last_chunk = []

for string, charset in self._chunks:

330

if charset == last_charset:

331

last_chunk.append(string)

332

else:

333

if last_charset is not None:

334

chunks.append((SPACE.join(last_chunk), last_charset))

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

335

last_chunk = [string]

336

last_charset = charset

337

if last_chunk:

338

chunks.append((SPACE.join(last_chunk), last_charset))

339

self._chunks = chunks

class _ValueFormatter:

344

def __init__(self, headerlen, maxlen, continuation_ws, splitchars):

345

self._maxlen = maxlen

346

self._continuation_ws = continuation_ws

347

self._continuation_ws_len = len(continuation_ws.replace('\t', SPACE8))

348

self._splitchars = splitchars

349

self._lines = []

350

self._current_line = _Accumulator(headerlen)

def __str__(self):

self.newline()

return NL.join(self._lines)

355

356

def newline(self):

Barry Warsaw

2007-08-31 02:35:00 +0000

[diff] [blame]

357

end_of_line = self._current_line.pop()

358

if end_of_line is not None:

359

self._current_line.push(end_of_line)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

360

if len(self._current_line) > 0:

361

self._lines.append(str(self._current_line))

362

self._current_line.reset()

363

Barry Warsaw

2007-08-31 02:35:00 +0000

[diff] [blame]

364

def add_transition(self):

365

self._current_line.push(None)

366

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

367

def feed(self, string, charset):

368

# If the string itself fits on the current line in its encoded format,

369

# then add it now and be done with it.

370

encoded_string = charset.header_encode(string)

371

if len(encoded_string) + len(self._current_line) <= self._maxlen:

372

self._current_line.push(encoded_string)

373

return

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

374

# If the charset has no header encoding (i.e. it is an ASCII encoding)

375

# then we must split the header at the "highest level syntactic break"

376

# possible. Note that we don't have a lot of smarts about field

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

377

# syntax; we just try to break on semi-colons, then commas, then

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

378

# whitespace. Eventually, this should be pluggable.

379

if charset.header_encoding is None:

380

for ch in self._splitchars:

if ch in string:

break

else:

ch = None

# If there's no available split character then regardless of

386

# whether the string fits on the line, we have to put it on a line

387

# by itself.

388

if ch is None:

389

if not self._current_line.is_onlyws():

390

self._lines.append(str(self._current_line))

391

self._current_line.reset(self._continuation_ws)

392

self._current_line.push(encoded_string)

393

else:

394

self._ascii_split(string, ch)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

395

return

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

396

# Otherwise, we're doing either a Base64 or a quoted-printable

397

# encoding which means we don't need to split the line on syntactic

398

# breaks. We can basically just find enough characters to fit on the

399

# current line, minus the RFC 2047 chrome. What makes this trickier

400

# though is that we have to split at octet boundaries, not character

401

# boundaries but it's only safe to split at character boundaries so at

402

# best we can only get close.

403

encoded_lines = charset.header_encode_lines(string, self._maxlengths())

404

# The first element extends the current line, but if it's None then

405

# nothing more fit on the current line so start a new line.

406

try:

407

first_line = encoded_lines.pop(0)

408

except IndexError:

409

# There are no encoded lines, so we're done.

410

return

411

if first_line is not None:

412

self._current_line.push(first_line)

413

self._lines.append(str(self._current_line))

414

self._current_line.reset(self._continuation_ws)

415

try:

416

last_line = encoded_lines.pop()

417

except IndexError:

418

# There was only one line.

419

return

420

self._current_line.push(last_line)

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

421

# Everything else are full lines in themselves.

422

for line in encoded_lines:

423

self._lines.append(self._continuation_ws + line)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

424

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

425

def _maxlengths(self):

426

# The first line's length.

427

yield self._maxlen - len(self._current_line)

428

while True:

429

yield self._maxlen - self._continuation_ws_len

430

431

def _ascii_split(self, string, ch):

432

holding = _Accumulator()

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

433

# Split the line on the split character, preserving it. If the split

434

# character is whitespace RFC 2822 $2.2.3 requires us to fold on the

435

# whitespace, so that the line leads with the original whitespace we

436

# split on. However, if a higher syntactic break is used instead

437

# (e.g. comma or semicolon), the folding should happen after the split

438

# character. But then in that case, we need to add our own

439

# continuation whitespace -- although won't that break unfolding?

440

for part, splitpart, nextpart in _spliterator(ch, string):

441

if not splitpart:

442

# No splitpart means this is the last chunk. Put this part

443

# either on the current line or the next line depending on

444

# whether it fits.

445

holding.push(part)

446

if len(holding) + len(self._current_line) <= self._maxlen:

447

# It fits, but we're done.

448

self._current_line.push(str(holding))

449

else:

450

# It doesn't fit, but we're done. Before pushing a new

451

# line, watch out for the current line containing only

452

# whitespace.

453

holding.pop()

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

454

if self._current_line.is_onlyws() and holding.is_onlyws():

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

455

# Don't start a new line.

456

holding.push(part)

457

part = None

458

self._current_line.push(str(holding))

459

self._lines.append(str(self._current_line))

460

if part is None:

461

self._current_line.reset()

462

else:

463

holding.reset(part)

464

self._current_line.reset(str(holding))

465

return

466

elif not nextpart:

467

# There must be some trailing split characters because we

468

# found a split character but no next part. In this case we

469

# must treat the thing to fit as the part + splitpart because

470

# if splitpart is whitespace it's not allowed to be the only

471

# thing on the line, and if it's not whitespace we must split

472

# after the syntactic break. In either case, we're done.

473

holding_prelen = len(holding)

474

holding.push(part + splitpart)

475

if len(holding) + len(self._current_line) <= self._maxlen:

476

self._current_line.push(str(holding))

477

elif holding_prelen == 0:

478

# This is the only chunk left so it has to go on the

479

# current line.

480

self._current_line.push(str(holding))

481

else:

482

save_part = holding.pop()

483

self._current_line.push(str(holding))

484

self._lines.append(str(self._current_line))

485

holding.reset(save_part)

486

self._current_line.reset(str(holding))

487

return

488

elif not part:

489

# We're leading with a split character. See if the splitpart

490

# and nextpart fits on the current line.

491

holding.push(splitpart + nextpart)

492

holding_len = len(holding)

493

# We know we're not leaving the nextpart on the stack.

494

holding.pop()

495

if holding_len + len(self._current_line) <= self._maxlen:

496

holding.push(splitpart)

497

else:

498

# It doesn't fit. Since there's no current part really

499

# the best we can do is start a new line and push the

500

# split part onto it.

501

self._current_line.push(str(holding))

502

holding.reset()

503

if len(self._current_line) > 0 and self._lines:

504

self._lines.append(str(self._current_line))

505

self._current_line.reset()

506

holding.push(splitpart)

507

else:

508

# All three parts are present. First let's see if all three

509

# parts will fit on the current line. If so, we don't need to

510

# split it.

511

holding.push(part + splitpart + nextpart)

512

holding_len = len(holding)

513

# Pop the part because we'll push nextpart on the next

514

# iteration through the loop.

515

holding.pop()

516

if holding_len + len(self._current_line) <= self._maxlen:

517

holding.push(part + splitpart)

518

else:

519

# The entire thing doesn't fit. See if we need to split

520

# before or after the split characters.

521

if splitpart.isspace():

522

# Split before whitespace. Remember that the

523

# whitespace becomes the continuation whitespace of

524

# the next line so it goes to current_line not holding.

525

holding.push(part)

526

self._current_line.push(str(holding))

527

holding.reset()

528

self._lines.append(str(self._current_line))

529

self._current_line.reset(splitpart)

530

else:

531

# Split after non-whitespace. The continuation

532

# whitespace comes from the instance variable.

533

holding.push(part + splitpart)

534

self._current_line.push(str(holding))

535

holding.reset()

536

self._lines.append(str(self._current_line))

537

if nextpart[0].isspace():

538

self._current_line.reset()

539

else:

540

self._current_line.reset(self._continuation_ws)

541

# Get the last of the holding part

542

self._current_line.push(str(holding))

def _spliterator(character, string):

547

parts = list(reversed(re.split('(%s)' % character, string)))

548

while parts:

549

part = parts.pop()

550

splitparts = (parts.pop() if parts else None)

551

nextpart = (parts.pop() if parts else None)

552

yield (part, splitparts, nextpart)

553

if nextpart is not None:

554

parts.append(nextpart)

555

556

557

class _Accumulator:

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

558

def __init__(self, initial_size=0):

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

559

self._initial_size = initial_size

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

560

self._current = []

561

562

def push(self, string):

563

self._current.append(string)

564

565

def pop(self):

Barry Warsaw

2007-08-31 02:35:00 +0000

[diff] [blame]

566

if not self._current:

567

return None

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

568

return self._current.pop()

569

570

def __len__(self):

Barry Warsaw

2007-08-31 02:35:00 +0000

[diff] [blame]

571

return sum(((1 if string is None else len(string))

572

for string in self._current),

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

573

self._initial_size)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

574

575

def __str__(self):

Barry Warsaw

2007-08-31 02:35:00 +0000

[diff] [blame]

576

if self._current and self._current[-1] is None:

577

self._current.pop()

578

return EMPTYSTRING.join((' ' if string is None else string)

579

for string in self._current)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

580

581

def reset(self, string=None):

582

self._current = []

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

583

self._initial_size = 0

584

if string is not None:

585

self.push(string)

Guido van Rossum