Blame - Lib/textwrap.py - platform/external/python/cpython3

2002-07-04 14:51:49 +0000

[diff] [blame]

1

"""Text wrapping and filling.

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

2

"""

3

Greg Ward

78cc051

2002-10-13 19:23:18 +0000

[diff] [blame]

4

Greg Ward

523008c

2003-06-15 15:37:18 +0000

[diff] [blame]

5

Greg Ward

698d9f0

2002-06-07 22:40:23 +0000

[diff] [blame]

6

# Written by Greg Ward <gward@python.net>

7

Benjamin Peterson

274271d

2011-06-28 10:25:04 -0500

[diff] [blame]

8

import re

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

9

Serhiy Storchaka

3e4b528

2013-10-16 13:07:53 +0300

[diff] [blame]

10

__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten']

Greg Ward

4c6c9c4

2003-02-03 14:46:57 +0000

[diff] [blame]

11

Greg Ward

afd44de

2002-12-12 17:24:35 +0000

[diff] [blame]

12

# Hardcode the recognized whitespace characters to the US-ASCII

13

# whitespace characters. The main reason for doing this is that in

14

# ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales

15

# that character winds up in string.whitespace. Respecting

16

# string.whitespace in those cases would 1) make textwrap treat 0xa0 the

17

# same as any other whitespace char, which is clearly wrong (it's a

18

# *non-breaking* space), 2) possibly cause problems with Unicode,

19

# since 0xa0 is not in range(128).

Greg Ward

4c6c9c4

2003-02-03 14:46:57 +0000

[diff] [blame]

20

_whitespace = '\t\n\x0b\x0c\r '

Greg Ward

afd44de

2002-12-12 17:24:35 +0000

[diff] [blame]

21

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

22

class TextWrapper:

23

"""

24

Object for wrapping/filling text. The public interface consists of

25

the wrap() and fill() methods; the other methods are just there for

26

subclasses to override in order to tweak the default behaviour.

27

If you want to completely replace the main wrapping algorithm,

28

you'll probably have to override _wrap_chunks().

29

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame]

30

Several instance attributes control various aspects of wrapping:

31

width (default: 70)

32

the maximum width of wrapped lines (unless break_long_words

33

is false)

Greg Ward

2002-06-10 21:37:12 +0000

[diff] [blame]

34

initial_indent (default: "")

35

string that will be prepended to the first line of wrapped

36

output. Counts towards the line's width.

37

subsequent_indent (default: "")

38

string that will be prepended to all lines save the first

39

of wrapped output; also counts towards each line's width.

Greg Ward

2002-06-07 21:56:16 +0000

[diff] [blame]

40

expand_tabs (default: true)

41

Expand tabs in input text to spaces before further processing.

Hynek Schlawack

2012-05-19 13:33:11 +0200

[diff] [blame]

42

Each tab will become 0 .. 'tabsize' spaces, depending on its position

43

in its line. If false, each tab is treated as a single character.

44

tabsize (default: 8)

45

Expand tabs in input text to 0 .. 'tabsize' spaces, unless

46

'expand_tabs' is false.

Greg Ward

2002-06-07 21:56:16 +0000

[diff] [blame]

47

replace_whitespace (default: true)

48

Replace all whitespace characters in the input text by spaces

49

after tab expansion. Note that if expand_tabs is false and

50

replace_whitespace is true, every tab will be converted to a

51

single space!

52

fix_sentence_endings (default: false)

53

Ensure that sentence-ending punctuation is always followed

Andrew M. Kuchling

a2ecabe

2003-02-14 01:14:15 +0000

[diff] [blame]

54

by two spaces. Off by default because the algorithm is

Greg Ward

2002-06-07 21:56:16 +0000

[diff] [blame]

55

(unavoidably) imperfect.

56

break_long_words (default: true)

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame]

57

Break words longer than 'width'. If false, those words will not

58

be broken, and some lines might be longer than 'width'.

Alexandre Vassalotti

2008-05-16 00:03:33 +0000

[diff] [blame]

59

break_on_hyphens (default: true)

60

Allow breaking hyphenated words. If true, wrapping will occur

61

preferably on whitespaces and right after hyphens part of

62

compound words.

Guido van Rossum

2007-04-27 19:54:29 +0000

[diff] [blame]

63

drop_whitespace (default: true)

64

Drop leading and trailing whitespace from lines.

Serhiy Storchaka

2013-10-15 21:22:54 +0300

[diff] [blame]

65

max_lines (default: None)

66

Truncate wrapped lines.

67

placeholder (default: ' [...]')

68

Append to the last line of truncated text.

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

69

"""

70

Greg Ward

2e74541

2002-12-09 16:23:08 +0000

[diff] [blame]

71

unicode_whitespace_trans = {}

Guido van Rossum

ef87d6e

2007-05-02 19:09:54 +0000

[diff] [blame]

72

uspace = ord(' ')

Guido van Rossum

c1f779c

2007-07-03 08:25:58 +0000

[diff] [blame]

73

for x in _whitespace:

74

unicode_whitespace_trans[ord(x)] = uspace

Greg Ward

2e74541

2002-12-09 16:23:08 +0000

[diff] [blame]

75

Tim Peters

c411dba

2002-07-16 21:35:23 +0000

[diff] [blame]

76

# This funky little regex is just the trick for splitting

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

77

# text up into word-wrappable chunks. E.g.

78

# "Hello there -- you goof-ball, use the -b option!"

79

# splits into

80

# Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!

81

# (after stripping out empty strings).

Greg Ward

4040794

2005-03-05 02:53:17 +0000

[diff] [blame]

82

wordsep_re = re.compile(

83

r'(\s+|' # any whitespace

Antoine Pitrou

7c59bc6

2008-12-13 23:20:54 +0000

[diff] [blame]

84

r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words

Greg Ward

4040794

2005-03-05 02:53:17 +0000

[diff] [blame]

85

r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

86

Alexandre Vassalotti

2008-05-16 00:03:33 +0000

[diff] [blame]

87

# This less funky little regex just split on recognized spaces. E.g.

88

# "Hello there -- you goof-ball, use the -b option!"

89

# splits into

90

# Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/

91

wordsep_simple_re = re.compile(r'(\s+)')

92

93

# XXX this is not locale- or charset-aware -- string.lowercase

94

# is US-ASCII only (and therefore English-only)

Guido van Rossum

9264ecd

2007-08-11 16:40:13 +0000

[diff] [blame]

95

sentence_end_re = re.compile(r'[a-z]' # lowercase letter

Greg Ward

9b4864e

2002-06-07 22:04:15 +0000

[diff] [blame]

96

r'[\.\!\?]' # sentence-ending punct.

97

r'[\"\']?' # optional end-of-quote

Alexandre Vassalotti

2008-05-16 00:03:33 +0000

[diff] [blame]

98

r'\Z') # end of chunk

Greg Ward

2002-06-07 21:56:16 +0000

[diff] [blame]

99

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

100

Greg Ward

f0ba764

2004-05-13 01:53:10 +0000

[diff] [blame]

def __init__(self,

width=70,

initial_indent="",

subsequent_indent="",

105

expand_tabs=True,

106

replace_whitespace=True,

107

fix_sentence_endings=False,

Guido van Rossum

2007-04-27 19:54:29 +0000

[diff] [blame]

108

break_long_words=True,

Alexandre Vassalotti

2008-05-16 00:03:33 +0000

[diff] [blame]

109

drop_whitespace=True,

Hynek Schlawack

2012-05-19 13:33:11 +0200

[diff] [blame]

110

break_on_hyphens=True,

Serhiy Storchaka

2013-10-15 21:22:54 +0300

[diff] [blame]

tabsize=8,

*,

max_lines=None,

placeholder=' [...]'):

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame]

115

self.width = width

Greg Ward

2002-06-10 21:37:12 +0000

[diff] [blame]

116

self.initial_indent = initial_indent

117

self.subsequent_indent = subsequent_indent

Greg Ward

47df99d

2002-06-09 00:22:07 +0000

[diff] [blame]

118

self.expand_tabs = expand_tabs

119

self.replace_whitespace = replace_whitespace

120

self.fix_sentence_endings = fix_sentence_endings

121

self.break_long_words = break_long_words

Guido van Rossum

2007-04-27 19:54:29 +0000

[diff] [blame]

122

self.drop_whitespace = drop_whitespace

Alexandre Vassalotti

2008-05-16 00:03:33 +0000

[diff] [blame]

123

self.break_on_hyphens = break_on_hyphens

Hynek Schlawack

2012-05-19 13:33:11 +0200

[diff] [blame]

124

self.tabsize = tabsize

Serhiy Storchaka

2013-10-15 21:22:54 +0300

[diff] [blame]

125

self.max_lines = max_lines

126

self.placeholder = placeholder

Tim Peters

c411dba

2002-07-16 21:35:23 +0000

[diff] [blame]

127

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

128

129

# -- Private methods -----------------------------------------------

130

# (possibly useful for subclasses to override)

131

Greg Ward

cb320eb

2002-06-07 22:32:15 +0000

[diff] [blame]

132

def _munge_whitespace(self, text):

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

133

"""_munge_whitespace(text : string) -> string

134

135

Munge whitespace in text: expand tabs and convert all other

Serhiy Storchaka

9f8a891

2015-04-03 18:12:41 +0300

[diff] [blame]

136

whitespace characters to spaces. Eg. " foo\\tbar\\n\\nbaz"

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

137

becomes " foo bar baz".

138

"""

139

if self.expand_tabs:

Hynek Schlawack

2012-05-19 13:33:11 +0200

[diff] [blame]

140

text = text.expandtabs(self.tabsize)

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

141

if self.replace_whitespace:

Georg Brandl

7f13e6b

2007-08-31 10:37:15 +0000

[diff] [blame]

142

text = text.translate(self.unicode_whitespace_trans)

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

return text

Greg Ward

2002-06-07 22:32:15 +0000

[diff] [blame]

146

def _split(self, text):

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

147

"""_split(text : string) -> [string]

148

149

Split the text to wrap into indivisible chunks. Chunks are

Benjamin Peterson

4ac9ce4

2009-10-04 14:49:41 +0000

[diff] [blame]

150

not quite the same as words; see _wrap_chunks() for full

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

151

details. As an example, the text

152

Look, goof-ball -- use the -b option!

153

breaks into the following chunks:

154

'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',

155

'use', ' ', 'the', ' ', '-b', ' ', 'option!'

Alexandre Vassalotti

2008-05-16 00:03:33 +0000

[diff] [blame]

156

if break_on_hyphens is True, or in:

157

'Look,', ' ', 'goof-ball', ' ', '--', ' ',

158

'use', ' ', 'the', ' ', '-b', ' ', option!'

159

otherwise.

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

160

"""

Alexandre Vassalotti

2008-05-16 00:03:33 +0000

[diff] [blame]

161

if self.break_on_hyphens is True:

162

chunks = self.wordsep_re.split(text)

163

else:

164

chunks = self.wordsep_simple_re.split(text)

Guido van Rossum

c1f779c

2007-07-03 08:25:58 +0000

[diff] [blame]

165

chunks = [c for c in chunks if c]

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

166

return chunks

167

Greg Ward

cb320eb

2002-06-07 22:32:15 +0000

[diff] [blame]

168

def _fix_sentence_endings(self, chunks):

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

169

"""_fix_sentence_endings(chunks : [string])

170

171

Correct for sentence endings buried in 'chunks'. Eg. when the

Serhiy Storchaka

9f8a891

2015-04-03 18:12:41 +0300

[diff] [blame]

172

original text contains "... foo.\\nBar ...", munge_whitespace()

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

173

and split() will convert that to [..., "foo.", " ", "Bar", ...]

174

which has one too few spaces; this method simply changes the one

175

space to two.

176

"""

177

i = 0

Benjamin Peterson

4ac9ce4

2009-10-04 14:49:41 +0000

[diff] [blame]

178

patsearch = self.sentence_end_re.search

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

179

while i < len(chunks)-1:

Benjamin Peterson

4ac9ce4

2009-10-04 14:49:41 +0000

[diff] [blame]

180

if chunks[i+1] == " " and patsearch(chunks[i]):

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

chunks[i+1] = " "

i += 2

else:

i += 1

Raymond Hettinger

2005-07-15 06:53:35 +0000

[diff] [blame]

186

def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

187

"""_handle_long_word(chunks : [string],

188

cur_line : [string],

Greg Ward

2002-06-10 21:37:12 +0000

[diff] [blame]

189

cur_len : int, width : int)

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

190

191

Handle a chunk of text (most likely a word, not whitespace) that

192

is too long to fit in any line.

193

"""

Georg Brandl

fceab5a

2008-01-19 20:08:23 +0000

[diff] [blame]

194

# Figure out when indent is larger than the specified width, and make

195

# sure at least one character is stripped off on every pass

if width < 1:

space_left = 1

else:

space_left = width - cur_len

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

200

201

# If we're allowed to break long words, then do so: put as much

202

# of the next chunk onto the current line as will fit.

203

if self.break_long_words:

Raymond Hettinger

2005-07-15 06:53:35 +0000

[diff] [blame]

204

cur_line.append(reversed_chunks[-1][:space_left])

205

reversed_chunks[-1] = reversed_chunks[-1][space_left:]

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

206

207

# Otherwise, we have to preserve the long word intact. Only add

208

# it to the current line if there's nothing already there --

209

# that minimizes how much we violate the width constraint.

210

elif not cur_line:

Raymond Hettinger

2005-07-15 06:53:35 +0000

[diff] [blame]

211

cur_line.append(reversed_chunks.pop())

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

212

213

# If we're not allowed to break long words, and there's already

214

# text on the current line, do nothing. Next time through the

215

# main loop of _wrap_chunks(), we'll wind up here again, but

216

# cur_len will be zero, so the next line will be entirely

217

# devoted to the long word that we can't handle right now.

218

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame]

219

def _wrap_chunks(self, chunks):

220

"""_wrap_chunks(chunks : [string]) -> [string]

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

221

222

Wrap a sequence of text chunks and return a list of lines of

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame]

223

length 'self.width' or less. (If 'break_long_words' is false,

224

some lines may be longer than this.) Chunks correspond roughly

225

to words and the whitespace between them: each chunk is

226

indivisible (modulo 'break_long_words'), but a line break can

227

come between any two chunks. Chunks should not have internal

228

whitespace; ie. a chunk is either all whitespace or a "word".

229

Whitespace chunks will be removed from the beginning and end of

230

lines, but apart from that whitespace is preserved.

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

231

"""

232

lines = []

Greg Ward

21820cd

2003-05-07 00:55:35 +0000

[diff] [blame]

233

if self.width <= 0:

234

raise ValueError("invalid width %r (must be > 0)" % self.width)

Serhiy Storchaka

2013-10-15 21:22:54 +0300

[diff] [blame]

235

if self.max_lines is not None:

236

if self.max_lines > 1:

237

indent = self.subsequent_indent

238

else:

239

indent = self.initial_indent

240

if len(indent) + len(self.placeholder.lstrip()) > self.width:

241

raise ValueError("placeholder too large for max width")

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

242

Raymond Hettinger

2005-07-15 06:53:35 +0000

[diff] [blame]

243

# Arrange in reverse order so items can be efficiently popped

244

# from a stack of chucks.

245

chunks.reverse()

246

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

247

while chunks:

248

Greg Ward

2002-06-10 21:37:12 +0000

[diff] [blame]

249

# Start the list of chunks that will make up the current line.

250

# cur_len is just the length of all the chunks in cur_line.

cur_line = []

cur_len = 0

# Figure out which static string will prefix this line.

255

if lines:

256

indent = self.subsequent_indent

257

else:

258

indent = self.initial_indent

259

260

# Maximum width for this line.

261

width = self.width - len(indent)

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

262

Greg Ward

ab73d46

2002-12-09 16:26:05 +0000

[diff] [blame]

263

# First chunk on line is whitespace -- drop it, unless this

264

# is the very beginning of the text (ie. no lines started yet).

Guido van Rossum

2007-04-27 19:54:29 +0000

[diff] [blame]

265

if self.drop_whitespace and chunks[-1].strip() == '' and lines:

Raymond Hettinger

2005-07-15 06:53:35 +0000

[diff] [blame]

266

del chunks[-1]

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

267

268

while chunks:

Raymond Hettinger

2005-07-15 06:53:35 +0000

[diff] [blame]

269

l = len(chunks[-1])

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

270

271

# Can at least squeeze this chunk onto the current line.

272

if cur_len + l <= width:

Raymond Hettinger

2005-07-15 06:53:35 +0000

[diff] [blame]

273

cur_line.append(chunks.pop())

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

274

cur_len += l

275

276

# Nope, this line is full.

else:

break

# The current line is full, and the next chunk is too big to

Tim Peters

c411dba

2002-07-16 21:35:23 +0000

[diff] [blame]

281

# fit on *any* line (not just this one).

Raymond Hettinger

2005-07-15 06:53:35 +0000

[diff] [blame]

282

if chunks and len(chunks[-1]) > width:

Greg Ward

2002-06-10 21:37:12 +0000

[diff] [blame]

283

self._handle_long_word(chunks, cur_line, cur_len, width)

Serhiy Storchaka

2013-10-15 21:22:54 +0300

[diff] [blame]

284

cur_len = sum(map(len, cur_line))

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

285

286

# If the last chunk on this line is all whitespace, drop it.

Guido van Rossum

2007-04-27 19:54:29 +0000

[diff] [blame]

287

if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':

Serhiy Storchaka

2013-10-15 21:22:54 +0300

[diff] [blame]

288

cur_len -= len(cur_line[-1])

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

289

del cur_line[-1]

290

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

291

if cur_line:

Serhiy Storchaka

2013-10-15 21:22:54 +0300

[diff] [blame]

292

if (self.max_lines is None or

293

len(lines) + 1 < self.max_lines or

294

(not chunks or

295

self.drop_whitespace and

296

len(chunks) == 1 and

297

not chunks[0].strip()) and cur_len <= width):

298

# Convert current line back to a string and store it in

299

# list of all lines (return value).

300

lines.append(indent + ''.join(cur_line))

301

else:

302

while cur_line:

303

if (cur_line[-1].strip() and

304

cur_len + len(self.placeholder) <= width):

305

cur_line.append(self.placeholder)

306

lines.append(indent + ''.join(cur_line))

307

break

308

cur_len -= len(cur_line[-1])

del cur_line[-1]

else:

if lines:

prev_line = lines[-1].rstrip()

313

if (len(prev_line) + len(self.placeholder) <=

314

self.width):

315

lines[-1] = prev_line + self.placeholder

316

break

317

lines.append(indent + self.placeholder.lstrip())

318

break

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

return lines

Antoine Pitrou

2013-08-12 22:39:09 +0200

[diff] [blame]

322

def _split_chunks(self, text):

323

text = self._munge_whitespace(text)

324

return self._split(text)

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

325

326

# -- Public interface ----------------------------------------------

327

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame]

328

def wrap(self, text):

329

"""wrap(text : string) -> [string]

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

330

Greg Ward

2002-07-04 14:51:49 +0000

[diff] [blame]

331

Reformat the single paragraph in 'text' so it fits in lines of

332

no more than 'self.width' columns, and return a list of wrapped

333

lines. Tabs in 'text' are expanded with string.expandtabs(),

334

and all other whitespace characters (including newline) are

335

converted to space.

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

336

"""

Antoine Pitrou

2013-08-12 22:39:09 +0200

[diff] [blame]

337

chunks = self._split_chunks(text)

Greg Ward

2002-06-07 21:56:16 +0000

[diff] [blame]

338

if self.fix_sentence_endings:

339

self._fix_sentence_endings(chunks)

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame]

340

return self._wrap_chunks(chunks)

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

341

Greg Ward

2002-06-10 21:37:12 +0000

[diff] [blame]

342

def fill(self, text):

343

"""fill(text : string) -> string

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

344

Greg Ward

2002-07-04 14:51:49 +0000

[diff] [blame]

345

Reformat the single paragraph in 'text' to fit in lines of no

346

more than 'self.width' columns, and return a new string

347

containing the entire wrapped paragraph.

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

348

"""

Greg Ward

2002-06-10 21:37:12 +0000

[diff] [blame]

349

return "\n".join(self.wrap(text))

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

350

351

Greg Ward

2002-07-04 14:51:49 +0000

[diff] [blame]

352

# -- Convenience interface ---------------------------------------------

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

353

Greg Ward

cf02ac6

2002-06-10 20:36:07 +0000

[diff] [blame]

354

def wrap(text, width=70, **kwargs):

Greg Ward

2002-07-04 14:51:49 +0000

[diff] [blame]

355

"""Wrap a single paragraph of text, returning a list of wrapped lines.

356

357

Reformat the single paragraph in 'text' so it fits in lines of no

358

more than 'width' columns, and return a list of wrapped lines. By

359

default, tabs in 'text' are expanded with string.expandtabs(), and

360

all other whitespace characters (including newline) are converted to

361

space. See TextWrapper class for available keyword args to customize

362

wrapping behaviour.

363

"""

Greg Ward

cf02ac6

2002-06-10 20:36:07 +0000

[diff] [blame]

364

w = TextWrapper(width=width, **kwargs)

365

return w.wrap(text)

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

366

Greg Ward

2002-06-10 21:37:12 +0000

[diff] [blame]

367

def fill(text, width=70, **kwargs):

Greg Ward

2002-07-04 14:51:49 +0000

[diff] [blame]

368

"""Fill a single paragraph of text, returning a new string.

369

370

Reformat the single paragraph in 'text' to fit in lines of no more

371

than 'width' columns, and return a new string containing the entire

372

wrapped paragraph. As with wrap(), tabs are expanded and other

373

whitespace characters converted to space. See TextWrapper class for

374

available keyword args to customize wrapping behaviour.

375

"""

Greg Ward

cf02ac6

2002-06-10 20:36:07 +0000

[diff] [blame]

376

w = TextWrapper(width=width, **kwargs)

Greg Ward

2002-06-10 21:37:12 +0000

[diff] [blame]

377

return w.fill(text)

Greg Ward

2003-05-08 01:58:05 +0000

[diff] [blame]

378

Serhiy Storchaka

2013-10-15 21:22:54 +0300

[diff] [blame]

379

def shorten(text, width, **kwargs):

Antoine Pitrou

2013-08-12 22:39:09 +0200

[diff] [blame]

380

"""Collapse and truncate the given text to fit in the given width.

381

382

The text first has its whitespace collapsed. If it then fits in

383

the *width*, it is returned as is. Otherwise, as many words

384

as possible are joined and then the placeholder is appended::

385

386

>>> textwrap.shorten("Hello world!", width=12)

387

'Hello world!'

388

>>> textwrap.shorten("Hello world!", width=11)

Antoine Pitrou

c593056

2013-08-16 22:31:12 +0200

[diff] [blame]

389

'Hello [...]'

Antoine Pitrou

2013-08-12 22:39:09 +0200

[diff] [blame]

390

"""

Serhiy Storchaka

2013-10-15 21:22:54 +0300

[diff] [blame]

391

w = TextWrapper(width=width, max_lines=1, **kwargs)

392

return w.fill(' '.join(text.strip().split()))

Antoine Pitrou

2013-08-12 22:39:09 +0200

[diff] [blame]

393

Greg Ward

2003-05-08 01:58:05 +0000

[diff] [blame]

394

395

# -- Loosely related functionality -------------------------------------

396

Thomas Wouters

2006-08-11 14:57:12 +0000

[diff] [blame]

397

_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)

398

_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)

399

Greg Ward

2003-05-08 01:58:05 +0000

[diff] [blame]

400

def dedent(text):

Thomas Wouters

2006-08-11 14:57:12 +0000

[diff] [blame]

401

"""Remove any common leading whitespace from every line in `text`.

Greg Ward

2003-05-08 01:58:05 +0000

[diff] [blame]

402

Thomas Wouters

2006-08-11 14:57:12 +0000

[diff] [blame]

403

This can be used to make triple-quoted strings line up with the left

404

edge of the display, while still presenting them in the source code

405

in indented form.

Greg Ward

2003-05-08 01:58:05 +0000

[diff] [blame]

406

Thomas Wouters

2006-08-11 14:57:12 +0000

[diff] [blame]

407

Note that tabs and spaces are both treated as whitespace, but they

Serhiy Storchaka

9f8a891

2015-04-03 18:12:41 +0300

[diff] [blame]

408

are not equal: the lines " hello" and "\\thello" are

Thomas Wouters

2006-08-11 14:57:12 +0000

[diff] [blame]

409

considered to have no common leading whitespace. (This behaviour is

410

new in Python 2.5; older versions of this module incorrectly

411

expanded tabs before searching for common leading whitespace.)

Greg Ward

2003-05-08 01:58:05 +0000

[diff] [blame]

412

"""

Thomas Wouters

2006-08-11 14:57:12 +0000

[diff] [blame]

413

# Look for the longest leading string of spaces and tabs common to

414

# all lines.

Greg Ward

2003-05-08 01:58:05 +0000

[diff] [blame]

415

margin = None

Thomas Wouters

2006-08-11 14:57:12 +0000

[diff] [blame]

416

text = _whitespace_only_re.sub('', text)

417

indents = _leading_whitespace_re.findall(text)

418

for indent in indents:

Greg Ward

2003-05-08 01:58:05 +0000

[diff] [blame]

419

if margin is None:

420

margin = indent

Thomas Wouters

2006-08-11 14:57:12 +0000

[diff] [blame]

421

422

# Current line more deeply indented than previous winner:

423

# no change (previous winner is still on top).

424

elif indent.startswith(margin):

425

pass

426

427

# Current line consistent with and no deeper than previous winner:

428

# it's the new winner.

429

elif margin.startswith(indent):

430

margin = indent

431

Serhiy Storchaka

ea4cb63

2015-10-28 21:39:36 +0200

[diff] [blame^]

432

# Find the largest common whitespace between current line and previous

433

# winner.

Greg Ward

2003-05-08 01:58:05 +0000

[diff] [blame]

434

else:

Serhiy Storchaka

ea4cb63

2015-10-28 21:39:36 +0200

[diff] [blame^]

435

for i, (x, y) in enumerate(zip(margin, indent)):

if x != y:

margin = margin[:i]

break

else:

margin = margin[:len(indent)]

Greg Ward

2003-05-08 01:58:05 +0000

[diff] [blame]

441

Thomas Wouters

2006-08-11 14:57:12 +0000

[diff] [blame]

442

# sanity check (testing/debugging only)

443

if 0 and margin:

444

for line in text.split("\n"):

445

assert not line or line.startswith(margin), \

446

"line = %r, margin = %r" % (line, margin)

Greg Ward

2003-05-08 01:58:05 +0000

[diff] [blame]

447

Thomas Wouters

2006-08-11 14:57:12 +0000

[diff] [blame]

448

if margin:

449

text = re.sub(r'(?m)^' + margin, '', text)

450

return text

451

Nick Coghlan

4fae8cd

2012-06-11 23:07:51 +1000

[diff] [blame]

452

453

def indent(text, prefix, predicate=None):

454

"""Adds 'prefix' to the beginning of selected lines in 'text'.

455

456

If 'predicate' is provided, 'prefix' will only be added to the lines

457

where 'predicate(line)' is True. If 'predicate' is not provided,

458

it will default to adding 'prefix' to all non-empty lines that do not

459

consist solely of whitespace characters.

460

"""

461

if predicate is None:

def predicate(line):

return line.strip()

def prefixed_lines():

466

for line in text.splitlines(True):

467

yield (prefix + line if predicate(line) else line)

468

return ''.join(prefixed_lines())

469

470

Thomas Wouters