Blame - Lib/email/header.py - platform/external/python/cpython2

2007-08-30 01:15:14 +0000

[diff] [blame]

1

2

# Author: Ben Gertzfield, Barry Warsaw

3

# Contact: email-sig@python.org

4

5

"""Header encoding and decoding functionality."""

__all__ = [

'Header',

'decode_header',

'make_header',

]

import re

import binascii

import email.quoprimime

17

import email.base64mime

18

19

from email.errors import HeaderParseError

20

from email.charset import Charset

NL = '\n'

SPACE = ' '

BSPACE = b' '

SPACE8 = ' ' * 8

EMPTYSTRING = ''

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

27

MAXLINELEN = 78

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

28

29

USASCII = Charset('us-ascii')

30

UTF8 = Charset('utf-8')

31

32

# Match encoded-word strings in the form =?charset?q?Hello_World?=

33

ecre = re.compile(r'''

34

=\? # literal =?

35

(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset

36

\? # literal ?

37

(?P<encoding>[qb]) # either a "q" or a "b", case insensitive

38

\? # literal ?

39

(?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string

40

\?= # literal ?=

41

(?=[ \t]|$) # whitespace or the end of the string

42

''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)

43

44

# Field name regexp, including trailing colon, but not separating whitespace,

45

# according to RFC 2822. Character range is from tilde to exclamation mark.

46

# For use with .match()

47

fcre = re.compile(r'[\041-\176]+:$')

# Helpers

_max_append = email.quoprimime._max_append

def decode_header(header):

57

"""Decode a message header value without converting charset.

58

59

Returns a list of (string, charset) pairs containing each of the decoded

60

parts of the header. Charset is None for non-encoded parts of the header,

61

otherwise a lower-case string containing the name of the character set

62

specified in the encoded string.

63

Amaury Forgeot d'Arc

1c25de6

2009-07-12 16:43:19 +0000

[diff] [blame]

64

An email.errors.HeaderParseError may be raised when certain decoding error

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

65

occurs (e.g. a base64 decoding exception).

66

"""

67

# If no encoding, just return the header with no charset.

68

if not ecre.search(header):

69

return [(header, None)]

70

# First step is to parse all the encoded parts into triplets of the form

71

# (encoded_string, encoding, charset). For unencoded strings, the last

72

# two parts will be None.

73

words = []

74

for line in header.splitlines():

75

parts = ecre.split(line)

76

while parts:

77

unencoded = parts.pop(0).strip()

78

if unencoded:

79

words.append((unencoded, None, None))

80

if parts:

81

charset = parts.pop(0).lower()

82

encoding = parts.pop(0).lower()

83

encoded = parts.pop(0)

84

words.append((encoded, encoding, charset))

85

# The next step is to decode each encoded word by applying the reverse

86

# base64 or quopri transformation. decoded_words is now a list of the

87

# form (decoded_word, charset).

88

decoded_words = []

89

for encoded_string, encoding, charset in words:

90

if encoding is None:

91

# This is an unencoded word.

92

decoded_words.append((encoded_string, charset))

93

elif encoding == 'q':

94

word = email.quoprimime.header_decode(encoded_string)

95

decoded_words.append((word, charset))

96

elif encoding == 'b':

R. David Murray

c4e69cc

2010-08-03 22:14:10 +0000

[diff] [blame]

97

paderr = len(encoded_string) % 4 # Postel's law: add missing padding

98

if paderr:

99

encoded_string += '==='[:4 - paderr]

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

100

try:

101

word = email.base64mime.decode(encoded_string)

102

except binascii.Error:

103

raise HeaderParseError('Base64 decoding error')

104

else:

105

decoded_words.append((word, charset))

106

else:

107

raise AssertionError('Unexpected encoding: ' + encoding)

108

# Now convert all words to bytes and collapse consecutive runs of

109

# similarly encoded words.

110

collapsed = []

111

last_word = last_charset = None

112

for word, charset in decoded_words:

113

if isinstance(word, str):

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

114

word = bytes(word, 'raw-unicode-escape')

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

115

if last_word is None:

116

last_word = word

117

last_charset = charset

118

elif charset != last_charset:

119

collapsed.append((last_word, last_charset))

120

last_word = word

121

last_charset = charset

122

elif last_charset is None:

123

last_word += BSPACE + word

124

else:

125

last_word += word

126

collapsed.append((last_word, last_charset))

return collapsed

def make_header(decoded_seq, maxlinelen=None, header_name=None,

132

continuation_ws=' '):

133

"""Create a Header from a sequence of pairs as returned by decode_header()

134

135

decode_header() takes a header value string and returns a sequence of

136

pairs of the format (decoded_string, charset) where charset is the string

137

name of the character set.

138

139

This function takes one of those sequence of pairs and returns a Header

140

instance. Optional maxlinelen, header_name, and continuation_ws are as in

141

the Header constructor.

142

"""

143

h = Header(maxlinelen=maxlinelen, header_name=header_name,

144

continuation_ws=continuation_ws)

145

for s, charset in decoded_seq:

146

# None means us-ascii but we can simply pass it on to h.append()

147

if charset is not None and not isinstance(charset, Charset):

148

charset = Charset(charset)

h.append(s, charset)

return h

class Header:

def __init__(self, s=None, charset=None,

156

maxlinelen=None, header_name=None,

157

continuation_ws=' ', errors='strict'):

158

"""Create a MIME-compliant header that can contain many character sets.

159

160

Optional s is the initial header value. If None, the initial header

161

value is not set. You can later append to the header with .append()

162

method calls. s may be a byte string or a Unicode string, but see the

163

.append() documentation for semantics.

164

165

Optional charset serves two purposes: it has the same meaning as the

166

charset argument to the .append() method. It also sets the default

167

character set for all subsequent .append() calls that omit the charset

168

argument. If charset is not provided in the constructor, the us-ascii

169

charset is used both as s's initial charset and as the default for

170

subsequent .append() calls.

171

R. David Murray

4c1da4c

2010-12-29 16:57:24 +0000

[diff] [blame]

172

The maximum line length can be specified explicitly via maxlinelen. For

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

173

splitting the first line to a shorter value (to account for the field

174

header which isn't included in s, e.g. `Subject') pass in the name of

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

175

the field in header_name. The default maxlinelen is 78 as recommended

176

by RFC 2822.

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

177

178

continuation_ws must be RFC 2822 compliant folding whitespace (usually

179

either a space or a hard tab) which will be prepended to continuation

180

lines.

181

182

errors is passed through to the .append() call.

"""

if charset is None:

charset = USASCII

elif not isinstance(charset, Charset):

187

charset = Charset(charset)

188

self._charset = charset

189

self._continuation_ws = continuation_ws

190

self._chunks = []

191

if s is not None:

192

self.append(s, charset, errors)

193

if maxlinelen is None:

194

maxlinelen = MAXLINELEN

195

self._maxlinelen = maxlinelen

196

if header_name is None:

197

self._headerlen = 0

198

else:

199

# Take the separating colon and space into account.

200

self._headerlen = len(header_name) + 2

201

202

def __str__(self):

203

"""Return the string value of the header."""

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

204

self._normalize()

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

205

uchunks = []

206

lastcs = None

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

207

for string, charset in self._chunks:

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

208

# We must preserve spaces between encoded and non-encoded word

209

# boundaries, which means for us we need to add a space when we go

210

# from a charset to None/us-ascii, or from None/us-ascii to a

211

# charset. Only do this for the second and subsequent chunks.

212

nextcs = charset

213

if uchunks:

214

if lastcs not in (None, 'us-ascii'):

215

if nextcs in (None, 'us-ascii'):

216

uchunks.append(SPACE)

217

nextcs = None

218

elif nextcs not in (None, 'us-ascii'):

219

uchunks.append(SPACE)

220

lastcs = nextcs

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

221

uchunks.append(string)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

222

return EMPTYSTRING.join(uchunks)

223

224

# Rich comparison operators for equality only. BAW: does it make sense to

225

# have or explicitly disable <, <=, >, >= operators?

226

def __eq__(self, other):

227

# other may be a Header or a string. Both are fine so coerce

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

228

# ourselves to a unicode (of the unencoded header value), swap the

229

# args and do another comparison.

230

return other == str(self)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

231

232

def __ne__(self, other):

233

return not self == other

234

235

def append(self, s, charset=None, errors='strict'):

236

"""Append a string to the MIME header.

237

238

Optional charset, if given, should be a Charset instance or the name

239

of a character set (which will be converted to a Charset instance). A

240

value of None (the default) means that the charset given in the

241

constructor is used.

242

243

s may be a byte string or a Unicode string. If it is a byte string

R. David Murray

4c1da4c

2010-12-29 16:57:24 +0000

[diff] [blame]

244

(i.e. isinstance(s, str) is false), then charset is the encoding of

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

245

that byte string, and a UnicodeError will be raised if the string

246

cannot be decoded with that charset. If s is a Unicode string, then

247

charset is a hint specifying the character set of the characters in

R. David Murray

2011-01-05 01:39:32 +0000

[diff] [blame]

248

the string. In either case, when producing an RFC 2822 compliant

249

header using RFC 2047 rules, the string will be encoded using the

250

output codec of the charset. If the string cannot be encoded to the

251

output codec, a UnicodeError will be raised.

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

252

R. David Murray

2011-01-05 01:39:32 +0000

[diff] [blame]

253

Optional `errors' is passed as the errors argument to the decode

254

call if s is a byte string.

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

255

"""

256

if charset is None:

257

charset = self._charset

258

elif not isinstance(charset, Charset):

259

charset = Charset(charset)

R. David Murray

2011-01-05 01:39:32 +0000

[diff] [blame]

260

if not isinstance(s, str):

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

261

input_charset = charset.input_codec or 'us-ascii'

R. David Murray

2011-01-05 01:39:32 +0000

[diff] [blame]

262

s = s.decode(input_charset, errors)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

263

# Ensure that the bytes we're storing can be decoded to the output

264

# character set, otherwise an early error is thrown.

265

output_charset = charset.output_codec or 'us-ascii'

R. David Murray

2011-01-05 01:39:32 +0000

[diff] [blame]

266

s.encode(output_charset, errors)

267

self._chunks.append((s, charset))

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

268

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

269

def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

270

"""Encode a message header into an RFC-compliant format.

271

272

There are many issues involved in converting a given string for use in

273

an email header. Only certain character sets are readable in most

274

email clients, and as header strings can only contain a subset of

275

7-bit ASCII, care must be taken to properly convert and encode (with

276

Base64 or quoted-printable) header strings. In addition, there is a

277

75-character length limit on any given encoded header field, so

278

line-wrapping must be performed, even with double-byte character sets.

279

280

This method will do its best to convert the string to the correct

281

character set used in email, and encode and line wrap it safely with

282

the appropriate scheme for that character set.

283

284

If the given charset is not known or an error occurs during

285

conversion, this function will return the header untouched.

286

287

Optional splitchars is a string containing characters to split long

288

ASCII lines on, in rough support of RFC 2822's `highest level

289

syntactic breaks'. This doesn't affect RFC 2047 encoded lines.

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

290

291

Optional linesep is a string to be used to separate the lines of

292

the value. The default value is the most useful for typical

293

Python applications, but it can be set to \r\n to produce RFC-compliant

294

line separators when needed.

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

295

"""

296

self._normalize()

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

297

if maxlinelen is None:

298

maxlinelen = self._maxlinelen

299

# A maxlinelen of 0 means don't wrap. For all practical purposes,

300

# choosing a huge number here accomplishes that and makes the

301

# _ValueFormatter algorithm much simpler.

302

if maxlinelen == 0:

303

maxlinelen = 1000000

304

formatter = _ValueFormatter(self._headerlen, maxlinelen,

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

305

self._continuation_ws, splitchars)

306

for string, charset in self._chunks:

307

lines = string.splitlines()

308

for line in lines:

309

formatter.feed(line, charset)

310

if len(lines) > 1:

311

formatter.newline()

Barry Warsaw

2007-08-31 02:35:00 +0000

[diff] [blame]

312

formatter.add_transition()

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

313

return formatter._str(linesep)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

314

315

def _normalize(self):

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

316

# Step 1: Normalize the chunks so that all runs of identical charsets

317

# get collapsed into a single unicode string.

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

chunks = []

last_charset = None

last_chunk = []

for string, charset in self._chunks:

322

if charset == last_charset:

323

last_chunk.append(string)

324

else:

325

if last_charset is not None:

326

chunks.append((SPACE.join(last_chunk), last_charset))

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

327

last_chunk = [string]

328

last_charset = charset

329

if last_chunk:

330

chunks.append((SPACE.join(last_chunk), last_charset))

331

self._chunks = chunks

class _ValueFormatter:

336

def __init__(self, headerlen, maxlen, continuation_ws, splitchars):

337

self._maxlen = maxlen

338

self._continuation_ws = continuation_ws

339

self._continuation_ws_len = len(continuation_ws.replace('\t', SPACE8))

340

self._splitchars = splitchars

341

self._lines = []

342

self._current_line = _Accumulator(headerlen)

343

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

344

def _str(self, linesep):

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

345

self.newline()

R. David Murray

2010-10-23 22:19:56 +0000

[diff] [blame]

346

return linesep.join(self._lines)

347

348

def __str__(self):

349

return self._str(NL)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

350

351

def newline(self):

Barry Warsaw

2007-08-31 02:35:00 +0000

[diff] [blame]

352

end_of_line = self._current_line.pop()

353

if end_of_line is not None:

354

self._current_line.push(end_of_line)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

355

if len(self._current_line) > 0:

356

self._lines.append(str(self._current_line))

357

self._current_line.reset()

358

Barry Warsaw

2007-08-31 02:35:00 +0000

[diff] [blame]

359

def add_transition(self):

360

self._current_line.push(None)

361

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

362

def feed(self, string, charset):

363

# If the string itself fits on the current line in its encoded format,

364

# then add it now and be done with it.

365

encoded_string = charset.header_encode(string)

366

if len(encoded_string) + len(self._current_line) <= self._maxlen:

367

self._current_line.push(encoded_string)

368

return

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

369

# If the charset has no header encoding (i.e. it is an ASCII encoding)

370

# then we must split the header at the "highest level syntactic break"

371

# possible. Note that we don't have a lot of smarts about field

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

372

# syntax; we just try to break on semi-colons, then commas, then

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

373

# whitespace. Eventually, this should be pluggable.

374

if charset.header_encoding is None:

375

for ch in self._splitchars:

if ch in string:

break

else:

ch = None

# If there's no available split character then regardless of

381

# whether the string fits on the line, we have to put it on a line

382

# by itself.

383

if ch is None:

384

if not self._current_line.is_onlyws():

385

self._lines.append(str(self._current_line))

386

self._current_line.reset(self._continuation_ws)

387

self._current_line.push(encoded_string)

388

else:

389

self._ascii_split(string, ch)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

390

return

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

391

# Otherwise, we're doing either a Base64 or a quoted-printable

392

# encoding which means we don't need to split the line on syntactic

393

# breaks. We can basically just find enough characters to fit on the

394

# current line, minus the RFC 2047 chrome. What makes this trickier

395

# though is that we have to split at octet boundaries, not character

396

# boundaries but it's only safe to split at character boundaries so at

397

# best we can only get close.

398

encoded_lines = charset.header_encode_lines(string, self._maxlengths())

399

# The first element extends the current line, but if it's None then

400

# nothing more fit on the current line so start a new line.

401

try:

402

first_line = encoded_lines.pop(0)

403

except IndexError:

404

# There are no encoded lines, so we're done.

405

return

406

if first_line is not None:

407

self._current_line.push(first_line)

408

self._lines.append(str(self._current_line))

409

self._current_line.reset(self._continuation_ws)

410

try:

411

last_line = encoded_lines.pop()

412

except IndexError:

413

# There was only one line.

414

return

415

self._current_line.push(last_line)

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

416

# Everything else are full lines in themselves.

417

for line in encoded_lines:

418

self._lines.append(self._continuation_ws + line)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

419

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

420

def _maxlengths(self):

421

# The first line's length.

422

yield self._maxlen - len(self._current_line)

423

while True:

424

yield self._maxlen - self._continuation_ws_len

425

426

def _ascii_split(self, string, ch):

427

holding = _Accumulator()

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

428

# Split the line on the split character, preserving it. If the split

429

# character is whitespace RFC 2822 $2.2.3 requires us to fold on the

430

# whitespace, so that the line leads with the original whitespace we

431

# split on. However, if a higher syntactic break is used instead

432

# (e.g. comma or semicolon), the folding should happen after the split

433

# character. But then in that case, we need to add our own

434

# continuation whitespace -- although won't that break unfolding?

435

for part, splitpart, nextpart in _spliterator(ch, string):

436

if not splitpart:

437

# No splitpart means this is the last chunk. Put this part

438

# either on the current line or the next line depending on

439

# whether it fits.

440

holding.push(part)

441

if len(holding) + len(self._current_line) <= self._maxlen:

442

# It fits, but we're done.

443

self._current_line.push(str(holding))

444

else:

445

# It doesn't fit, but we're done. Before pushing a new

446

# line, watch out for the current line containing only

447

# whitespace.

448

holding.pop()

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

449

if self._current_line.is_onlyws() and holding.is_onlyws():

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

450

# Don't start a new line.

451

holding.push(part)

452

part = None

453

self._current_line.push(str(holding))

454

self._lines.append(str(self._current_line))

455

if part is None:

456

self._current_line.reset()

457

else:

458

holding.reset(part)

459

self._current_line.reset(str(holding))

460

return

461

elif not nextpart:

462

# There must be some trailing split characters because we

463

# found a split character but no next part. In this case we

464

# must treat the thing to fit as the part + splitpart because

465

# if splitpart is whitespace it's not allowed to be the only

466

# thing on the line, and if it's not whitespace we must split

467

# after the syntactic break. In either case, we're done.

468

holding_prelen = len(holding)

469

holding.push(part + splitpart)

470

if len(holding) + len(self._current_line) <= self._maxlen:

471

self._current_line.push(str(holding))

472

elif holding_prelen == 0:

473

# This is the only chunk left so it has to go on the

474

# current line.

475

self._current_line.push(str(holding))

476

else:

477

save_part = holding.pop()

478

self._current_line.push(str(holding))

479

self._lines.append(str(self._current_line))

480

holding.reset(save_part)

481

self._current_line.reset(str(holding))

482

return

483

elif not part:

484

# We're leading with a split character. See if the splitpart

485

# and nextpart fits on the current line.

486

holding.push(splitpart + nextpart)

487

holding_len = len(holding)

488

# We know we're not leaving the nextpart on the stack.

489

holding.pop()

490

if holding_len + len(self._current_line) <= self._maxlen:

491

holding.push(splitpart)

492

else:

493

# It doesn't fit. Since there's no current part really

494

# the best we can do is start a new line and push the

495

# split part onto it.

496

self._current_line.push(str(holding))

497

holding.reset()

498

if len(self._current_line) > 0 and self._lines:

499

self._lines.append(str(self._current_line))

500

self._current_line.reset()

501

holding.push(splitpart)

502

else:

503

# All three parts are present. First let's see if all three

504

# parts will fit on the current line. If so, we don't need to

505

# split it.

506

holding.push(part + splitpart + nextpart)

507

holding_len = len(holding)

508

# Pop the part because we'll push nextpart on the next

509

# iteration through the loop.

510

holding.pop()

511

if holding_len + len(self._current_line) <= self._maxlen:

512

holding.push(part + splitpart)

513

else:

514

# The entire thing doesn't fit. See if we need to split

515

# before or after the split characters.

516

if splitpart.isspace():

517

# Split before whitespace. Remember that the

518

# whitespace becomes the continuation whitespace of

519

# the next line so it goes to current_line not holding.

520

holding.push(part)

521

self._current_line.push(str(holding))

522

holding.reset()

523

self._lines.append(str(self._current_line))

524

self._current_line.reset(splitpart)

525

else:

526

# Split after non-whitespace. The continuation

527

# whitespace comes from the instance variable.

528

holding.push(part + splitpart)

529

self._current_line.push(str(holding))

530

holding.reset()

531

self._lines.append(str(self._current_line))

532

if nextpart[0].isspace():

533

self._current_line.reset()

534

else:

535

self._current_line.reset(self._continuation_ws)

536

# Get the last of the holding part

537

self._current_line.push(str(holding))

def _spliterator(character, string):

542

parts = list(reversed(re.split('(%s)' % character, string)))

543

while parts:

544

part = parts.pop()

545

splitparts = (parts.pop() if parts else None)

546

nextpart = (parts.pop() if parts else None)

547

yield (part, splitparts, nextpart)

548

if nextpart is not None:

549

parts.append(nextpart)

550

551

552

class _Accumulator:

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

553

def __init__(self, initial_size=0):

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

554

self._initial_size = initial_size

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

555

self._current = []

556

557

def push(self, string):

558

self._current.append(string)

559

560

def pop(self):

Barry Warsaw

2007-08-31 02:35:00 +0000

[diff] [blame]

561

if not self._current:

562

return None

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

563

return self._current.pop()

564

565

def __len__(self):

Barry Warsaw

2007-08-31 02:35:00 +0000

[diff] [blame]

566

return sum(((1 if string is None else len(string))

567

for string in self._current),

Guido van Rossum

2007-08-30 03:46:43 +0000

[diff] [blame]

568

self._initial_size)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

569

570

def __str__(self):

Barry Warsaw

2007-08-31 02:35:00 +0000

[diff] [blame]

571

if self._current and self._current[-1] is None:

572

self._current.pop()

573

return EMPTYSTRING.join((' ' if string is None else string)

574

for string in self._current)

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

575

576

def reset(self, string=None):

577

self._current = []

Guido van Rossum

2007-08-30 01:15:14 +0000

[diff] [blame]

578

self._initial_size = 0

579

if string is not None:

580

self.push(string)

Guido van Rossum