Blame - Lib/textwrap.py - platform/external/python/cpython3

2002-06-07 21:43:37 +0000

[diff] [blame]

1

"""

2

Utilities for wrapping text strings and filling text paragraphs.

3

"""

4

Greg Ward

698d9f0

2002-06-07 22:40:23 +0000

[diff] [blame]

5

6

7

# Written by Greg Ward <gward@python.net>

8

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

9

__revision__ = "$Id$"

import string, re

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

13

class TextWrapper:

14

"""

15

Object for wrapping/filling text. The public interface consists of

16

the wrap() and fill() methods; the other methods are just there for

17

subclasses to override in order to tweak the default behaviour.

18

If you want to completely replace the main wrapping algorithm,

19

you'll probably have to override _wrap_chunks().

20

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame^]

21

Several instance attributes control various aspects of wrapping:

22

width (default: 70)

23

the maximum width of wrapped lines (unless break_long_words

24

is false)

Greg Ward

62e4f3b

2002-06-07 21:56:16 +0000

[diff] [blame]

25

expand_tabs (default: true)

26

Expand tabs in input text to spaces before further processing.

27

Each tab will become 1 .. 8 spaces, depending on its position in

28

its line. If false, each tab is treated as a single character.

29

replace_whitespace (default: true)

30

Replace all whitespace characters in the input text by spaces

31

after tab expansion. Note that if expand_tabs is false and

32

replace_whitespace is true, every tab will be converted to a

33

single space!

34

fix_sentence_endings (default: false)

35

Ensure that sentence-ending punctuation is always followed

36

by two spaces. Off by default becaus the algorithm is

37

(unavoidably) imperfect.

38

break_long_words (default: true)

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame^]

39

Break words longer than 'width'. If false, those words will not

40

be broken, and some lines might be longer than 'width'.

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

41

"""

42

43

whitespace_trans = string.maketrans(string.whitespace,

44

' ' * len(string.whitespace))

45

46

# This funky little regex is just the trick for splitting

47

# text up into word-wrappable chunks. E.g.

48

# "Hello there -- you goof-ball, use the -b option!"

49

# splits into

50

# Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!

51

# (after stripping out empty strings).

52

wordsep_re = re.compile(r'(\s+|' # any whitespace

53

r'\w{2,}-(?=\w{2,})|' # hyphenated words

54

r'(?<=\w)-{2,}(?=\w))') # em-dash

55

Greg Ward

9b4864e

2002-06-07 22:04:15 +0000

[diff] [blame]

56

# XXX will there be a locale-or-charset-aware version of

57

# string.lowercase in 2.3?

58

sentence_end_re = re.compile(r'[%s]' # lowercase letter

59

r'[\.\!\?]' # sentence-ending punct.

60

r'[\"\']?' # optional end-of-quote

61

% string.lowercase)

Greg Ward

62e4f3b

2002-06-07 21:56:16 +0000

[diff] [blame]

62

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

63

Greg Ward

47df99d

2002-06-09 00:22:07 +0000

[diff] [blame]

64

def __init__ (self,

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame^]

65

width=70,

Greg Ward

47df99d

2002-06-09 00:22:07 +0000

[diff] [blame]

66

expand_tabs=True,

67

replace_whitespace=True,

68

fix_sentence_endings=False,

69

break_long_words=True):

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame^]

70

self.width = width

Greg Ward

47df99d

2002-06-09 00:22:07 +0000

[diff] [blame]

71

self.expand_tabs = expand_tabs

72

self.replace_whitespace = replace_whitespace

73

self.fix_sentence_endings = fix_sentence_endings

74

self.break_long_words = break_long_words

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

75

76

77

# -- Private methods -----------------------------------------------

78

# (possibly useful for subclasses to override)

79

Greg Ward

2002-06-07 22:32:15 +0000

[diff] [blame]

80

def _munge_whitespace(self, text):

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

81

"""_munge_whitespace(text : string) -> string

82

83

Munge whitespace in text: expand tabs and convert all other

84

whitespace characters to spaces. Eg. " foo\tbar\n\nbaz"

85

becomes " foo bar baz".

86

"""

87

if self.expand_tabs:

88

text = text.expandtabs()

89

if self.replace_whitespace:

90

text = text.translate(self.whitespace_trans)

return text

Greg Ward

2002-06-07 22:32:15 +0000

[diff] [blame]

94

def _split(self, text):

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

95

"""_split(text : string) -> [string]

96

97

Split the text to wrap into indivisible chunks. Chunks are

98

not quite the same as words; see wrap_chunks() for full

99

details. As an example, the text

100

Look, goof-ball -- use the -b option!

101

breaks into the following chunks:

102

'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',

103

'use', ' ', 'the', ' ', '-b', ' ', 'option!'

104

"""

105

chunks = self.wordsep_re.split(text)

106

chunks = filter(None, chunks)

107

return chunks

108

Greg Ward

2002-06-07 22:32:15 +0000

[diff] [blame]

109

def _fix_sentence_endings(self, chunks):

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

110

"""_fix_sentence_endings(chunks : [string])

111

112

Correct for sentence endings buried in 'chunks'. Eg. when the

113

original text contains "... foo.\nBar ...", munge_whitespace()

114

and split() will convert that to [..., "foo.", " ", "Bar", ...]

115

which has one too few spaces; this method simply changes the one

116

space to two.

117

"""

118

i = 0

Greg Ward

9b4864e

2002-06-07 22:04:15 +0000

[diff] [blame]

119

pat = self.sentence_end_re

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

120

while i < len(chunks)-1:

Greg Ward

9b4864e

2002-06-07 22:04:15 +0000

[diff] [blame]

121

if chunks[i+1] == " " and pat.search(chunks[i]):

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

chunks[i+1] = " "

i += 2

else:

i += 1

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame^]

127

def _handle_long_word(self, chunks, cur_line, cur_len):

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

128

"""_handle_long_word(chunks : [string],

129

cur_line : [string],

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame^]

130

cur_len : int)

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

131

132

Handle a chunk of text (most likely a word, not whitespace) that

133

is too long to fit in any line.

134

"""

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame^]

135

space_left = self.width - cur_len

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

136

137

# If we're allowed to break long words, then do so: put as much

138

# of the next chunk onto the current line as will fit.

139

if self.break_long_words:

140

cur_line.append(chunks[0][0:space_left])

141

chunks[0] = chunks[0][space_left:]

142

143

# Otherwise, we have to preserve the long word intact. Only add

144

# it to the current line if there's nothing already there --

145

# that minimizes how much we violate the width constraint.

146

elif not cur_line:

147

cur_line.append(chunks.pop(0))

148

149

# If we're not allowed to break long words, and there's already

150

# text on the current line, do nothing. Next time through the

151

# main loop of _wrap_chunks(), we'll wind up here again, but

152

# cur_len will be zero, so the next line will be entirely

153

# devoted to the long word that we can't handle right now.

154

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame^]

155

def _wrap_chunks(self, chunks):

156

"""_wrap_chunks(chunks : [string]) -> [string]

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

157

158

Wrap a sequence of text chunks and return a list of lines of

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame^]

159

length 'self.width' or less. (If 'break_long_words' is false,

160

some lines may be longer than this.) Chunks correspond roughly

161

to words and the whitespace between them: each chunk is

162

indivisible (modulo 'break_long_words'), but a line break can

163

come between any two chunks. Chunks should not have internal

164

whitespace; ie. a chunk is either all whitespace or a "word".

165

Whitespace chunks will be removed from the beginning and end of

166

lines, but apart from that whitespace is preserved.

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

167

"""

168

lines = []

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame^]

169

width = self.width

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

while chunks:

cur_line = [] # list of chunks (to-be-joined)

174

cur_len = 0 # length of current line

175

176

# First chunk on line is whitespace -- drop it.

177

if chunks[0].strip() == '':

del chunks[0]

while chunks:

l = len(chunks[0])

# Can at least squeeze this chunk onto the current line.

184

if cur_len + l <= width:

185

cur_line.append(chunks.pop(0))

186

cur_len += l

187

188

# Nope, this line is full.

else:

break

# The current line is full, and the next chunk is too big to

193

# fit on *any* line (not just this one).

194

if chunks and len(chunks[0]) > width:

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame^]

195

self._handle_long_word(chunks, cur_line, cur_len)

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

196

197

# If the last chunk on this line is all whitespace, drop it.

198

if cur_line and cur_line[-1].strip() == '':

199

del cur_line[-1]

200

201

# Convert current line back to a string and store it in list

202

# of all lines (return value).

203

if cur_line:

204

lines.append(''.join(cur_line))

return lines

# -- Public interface ----------------------------------------------

210

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame^]

211

def wrap(self, text):

212

"""wrap(text : string) -> [string]

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

213

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame^]

214

Split 'text' into multiple lines of no more than 'self.width'

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

215

characters each, and return the list of strings that results.

216

Tabs in 'text' are expanded with string.expandtabs(), and all

217

other whitespace characters (including newline) are converted to

218

space.

219

"""

220

text = self._munge_whitespace(text)

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame^]

221

if len(text) <= self.width:

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

222

return [text]

223

chunks = self._split(text)

Greg Ward

62e4f3b

2002-06-07 21:56:16 +0000

[diff] [blame]

224

if self.fix_sentence_endings:

225

self._fix_sentence_endings(chunks)

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame^]

226

return self._wrap_chunks(chunks)

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

227

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame^]

228

def fill(self, text, initial_tab="", subsequent_tab=""):

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

229

"""fill(text : string,

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

230

initial_tab : string = "",

231

subsequent_tab : string = "")

232

-> string

233

234

Reformat the paragraph in 'text' to fit in lines of no more than

235

'width' columns. The first line is prefixed with 'initial_tab',

236

and subsequent lines are prefixed with 'subsequent_tab'; the

237

lengths of the tab strings are accounted for when wrapping lines

238

to fit in 'width' columns.

239

"""

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame^]

240

lines = self.wrap(text)

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

241

sep = "\n" + subsequent_tab

242

return initial_tab + sep.join(lines)

243

244

245

# Convenience interface

246

Greg Ward

2002-06-07 22:32:15 +0000

[diff] [blame]

247

def wrap(text, width):

Greg Ward

2002-06-10 20:26:02 +0000

[diff] [blame^]

248

return TextWrapper(width=width).wrap(text)

Greg Ward

2002-06-07 21:43:37 +0000

[diff] [blame]

249

Greg Ward

2002-06-07 22:32:15 +0000

[diff] [blame]

250

def fill(text, width, initial_tab="", subsequent_tab=""):

Greg Ward