blob: 3ad3e18ea8998d928a14ea7efb935bdb62af6741 [file] [log] [blame]
Greg Warde807e572002-07-04 14:51:49 +00001"""Text wrapping and filling.
Greg Ward00935822002-06-07 21:43:37 +00002"""
3
Greg Ward78cc0512002-10-13 19:23:18 +00004# Copyright (C) 1999-2001 Gregory P. Ward.
Greg Ward523008c2003-06-15 15:37:18 +00005# Copyright (C) 2002, 2003 Python Software Foundation.
Greg Ward698d9f02002-06-07 22:40:23 +00006# Written by Greg Ward <gward@python.net>
7
Benjamin Peterson274271d2011-06-28 10:25:04 -05008import re
Greg Ward00935822002-06-07 21:43:37 +00009
Serhiy Storchaka3e4b5282013-10-16 13:07:53 +030010__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten']
Greg Ward4c6c9c42003-02-03 14:46:57 +000011
Greg Wardafd44de2002-12-12 17:24:35 +000012# Hardcode the recognized whitespace characters to the US-ASCII
13# whitespace characters. The main reason for doing this is that in
14# ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales
15# that character winds up in string.whitespace. Respecting
16# string.whitespace in those cases would 1) make textwrap treat 0xa0 the
17# same as any other whitespace char, which is clearly wrong (it's a
18# *non-breaking* space), 2) possibly cause problems with Unicode,
19# since 0xa0 is not in range(128).
Greg Ward4c6c9c42003-02-03 14:46:57 +000020_whitespace = '\t\n\x0b\x0c\r '
Greg Wardafd44de2002-12-12 17:24:35 +000021
Greg Ward00935822002-06-07 21:43:37 +000022class TextWrapper:
23 """
24 Object for wrapping/filling text. The public interface consists of
25 the wrap() and fill() methods; the other methods are just there for
26 subclasses to override in order to tweak the default behaviour.
27 If you want to completely replace the main wrapping algorithm,
28 you'll probably have to override _wrap_chunks().
29
Greg Wardd34c9592002-06-10 20:26:02 +000030 Several instance attributes control various aspects of wrapping:
31 width (default: 70)
32 the maximum width of wrapped lines (unless break_long_words
33 is false)
Greg Ward62080be2002-06-10 21:37:12 +000034 initial_indent (default: "")
35 string that will be prepended to the first line of wrapped
36 output. Counts towards the line's width.
37 subsequent_indent (default: "")
38 string that will be prepended to all lines save the first
39 of wrapped output; also counts towards each line's width.
Greg Ward62e4f3b2002-06-07 21:56:16 +000040 expand_tabs (default: true)
41 Expand tabs in input text to spaces before further processing.
Hynek Schlawackd5272592012-05-19 13:33:11 +020042 Each tab will become 0 .. 'tabsize' spaces, depending on its position
43 in its line. If false, each tab is treated as a single character.
44 tabsize (default: 8)
45 Expand tabs in input text to 0 .. 'tabsize' spaces, unless
46 'expand_tabs' is false.
Greg Ward62e4f3b2002-06-07 21:56:16 +000047 replace_whitespace (default: true)
48 Replace all whitespace characters in the input text by spaces
49 after tab expansion. Note that if expand_tabs is false and
50 replace_whitespace is true, every tab will be converted to a
51 single space!
52 fix_sentence_endings (default: false)
53 Ensure that sentence-ending punctuation is always followed
Andrew M. Kuchlinga2ecabe2003-02-14 01:14:15 +000054 by two spaces. Off by default because the algorithm is
Greg Ward62e4f3b2002-06-07 21:56:16 +000055 (unavoidably) imperfect.
56 break_long_words (default: true)
Greg Wardd34c9592002-06-10 20:26:02 +000057 Break words longer than 'width'. If false, those words will not
58 be broken, and some lines might be longer than 'width'.
Alexandre Vassalotti5f8ced22008-05-16 00:03:33 +000059 break_on_hyphens (default: true)
60 Allow breaking hyphenated words. If true, wrapping will occur
61 preferably on whitespaces and right after hyphens part of
62 compound words.
Guido van Rossumd8faa362007-04-27 19:54:29 +000063 drop_whitespace (default: true)
64 Drop leading and trailing whitespace from lines.
Serhiy Storchakaacc9f3f2013-10-15 21:22:54 +030065 max_lines (default: None)
66 Truncate wrapped lines.
67 placeholder (default: ' [...]')
68 Append to the last line of truncated text.
Greg Ward00935822002-06-07 21:43:37 +000069 """
70
Greg Ward2e745412002-12-09 16:23:08 +000071 unicode_whitespace_trans = {}
Guido van Rossumef87d6e2007-05-02 19:09:54 +000072 uspace = ord(' ')
Guido van Rossumc1f779c2007-07-03 08:25:58 +000073 for x in _whitespace:
74 unicode_whitespace_trans[ord(x)] = uspace
Greg Ward2e745412002-12-09 16:23:08 +000075
Tim Petersc411dba2002-07-16 21:35:23 +000076 # This funky little regex is just the trick for splitting
Greg Ward00935822002-06-07 21:43:37 +000077 # text up into word-wrappable chunks. E.g.
78 # "Hello there -- you goof-ball, use the -b option!"
79 # splits into
80 # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
81 # (after stripping out empty strings).
Serhiy Storchaka72bd3272015-03-24 18:32:27 +020082 word_punct = r'[\w!"\'&.,?]'
83 letter = r'[^\d\W]'
84 wordsep_re = re.compile(r'''
85 ( # any whitespace
86 \s+
87 | # em-dash between words
88 (?<=%(wp)s) -{2,} (?=\w)
89 | # word, possibly hyphenated
90 \S+? (?:
91 # hyphenated word
92 -(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-))
93 (?= %(lt)s -? %(lt)s)
94 | # end of word
95 (?=\s|\Z)
96 | # em-dash
97 (?<=%(wp)s) (?=-{2,}\w)
98 )
99 )''' % {'wp': word_punct, 'lt': letter}, re.VERBOSE)
100 del word_punct, letter
Greg Ward00935822002-06-07 21:43:37 +0000101
Alexandre Vassalotti5f8ced22008-05-16 00:03:33 +0000102 # This less funky little regex just split on recognized spaces. E.g.
103 # "Hello there -- you goof-ball, use the -b option!"
104 # splits into
105 # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
106 wordsep_simple_re = re.compile(r'(\s+)')
107
108 # XXX this is not locale- or charset-aware -- string.lowercase
109 # is US-ASCII only (and therefore English-only)
Guido van Rossum9264ecd2007-08-11 16:40:13 +0000110 sentence_end_re = re.compile(r'[a-z]' # lowercase letter
Greg Ward9b4864e2002-06-07 22:04:15 +0000111 r'[\.\!\?]' # sentence-ending punct.
112 r'[\"\']?' # optional end-of-quote
Alexandre Vassalotti5f8ced22008-05-16 00:03:33 +0000113 r'\Z') # end of chunk
Greg Ward62e4f3b2002-06-07 21:56:16 +0000114
Greg Ward00935822002-06-07 21:43:37 +0000115
Greg Wardf0ba7642004-05-13 01:53:10 +0000116 def __init__(self,
117 width=70,
118 initial_indent="",
119 subsequent_indent="",
120 expand_tabs=True,
121 replace_whitespace=True,
122 fix_sentence_endings=False,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000123 break_long_words=True,
Alexandre Vassalotti5f8ced22008-05-16 00:03:33 +0000124 drop_whitespace=True,
Hynek Schlawackd5272592012-05-19 13:33:11 +0200125 break_on_hyphens=True,
Serhiy Storchakaacc9f3f2013-10-15 21:22:54 +0300126 tabsize=8,
127 *,
128 max_lines=None,
129 placeholder=' [...]'):
Greg Wardd34c9592002-06-10 20:26:02 +0000130 self.width = width
Greg Ward62080be2002-06-10 21:37:12 +0000131 self.initial_indent = initial_indent
132 self.subsequent_indent = subsequent_indent
Greg Ward47df99d2002-06-09 00:22:07 +0000133 self.expand_tabs = expand_tabs
134 self.replace_whitespace = replace_whitespace
135 self.fix_sentence_endings = fix_sentence_endings
136 self.break_long_words = break_long_words
Guido van Rossumd8faa362007-04-27 19:54:29 +0000137 self.drop_whitespace = drop_whitespace
Alexandre Vassalotti5f8ced22008-05-16 00:03:33 +0000138 self.break_on_hyphens = break_on_hyphens
Hynek Schlawackd5272592012-05-19 13:33:11 +0200139 self.tabsize = tabsize
Serhiy Storchakaacc9f3f2013-10-15 21:22:54 +0300140 self.max_lines = max_lines
141 self.placeholder = placeholder
Tim Petersc411dba2002-07-16 21:35:23 +0000142
Greg Ward00935822002-06-07 21:43:37 +0000143
144 # -- Private methods -----------------------------------------------
145 # (possibly useful for subclasses to override)
146
Greg Wardcb320eb2002-06-07 22:32:15 +0000147 def _munge_whitespace(self, text):
Greg Ward00935822002-06-07 21:43:37 +0000148 """_munge_whitespace(text : string) -> string
149
150 Munge whitespace in text: expand tabs and convert all other
Serhiy Storchaka9f8a8912015-04-03 18:12:41 +0300151 whitespace characters to spaces. Eg. " foo\\tbar\\n\\nbaz"
Greg Ward00935822002-06-07 21:43:37 +0000152 becomes " foo bar baz".
153 """
154 if self.expand_tabs:
Hynek Schlawackd5272592012-05-19 13:33:11 +0200155 text = text.expandtabs(self.tabsize)
Greg Ward00935822002-06-07 21:43:37 +0000156 if self.replace_whitespace:
Georg Brandl7f13e6b2007-08-31 10:37:15 +0000157 text = text.translate(self.unicode_whitespace_trans)
Greg Ward00935822002-06-07 21:43:37 +0000158 return text
159
160
Greg Wardcb320eb2002-06-07 22:32:15 +0000161 def _split(self, text):
Greg Ward00935822002-06-07 21:43:37 +0000162 """_split(text : string) -> [string]
163
164 Split the text to wrap into indivisible chunks. Chunks are
Benjamin Peterson4ac9ce42009-10-04 14:49:41 +0000165 not quite the same as words; see _wrap_chunks() for full
Greg Ward00935822002-06-07 21:43:37 +0000166 details. As an example, the text
167 Look, goof-ball -- use the -b option!
168 breaks into the following chunks:
169 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
170 'use', ' ', 'the', ' ', '-b', ' ', 'option!'
Alexandre Vassalotti5f8ced22008-05-16 00:03:33 +0000171 if break_on_hyphens is True, or in:
172 'Look,', ' ', 'goof-ball', ' ', '--', ' ',
173 'use', ' ', 'the', ' ', '-b', ' ', option!'
174 otherwise.
Greg Ward00935822002-06-07 21:43:37 +0000175 """
Alexandre Vassalotti5f8ced22008-05-16 00:03:33 +0000176 if self.break_on_hyphens is True:
177 chunks = self.wordsep_re.split(text)
178 else:
179 chunks = self.wordsep_simple_re.split(text)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000180 chunks = [c for c in chunks if c]
Greg Ward00935822002-06-07 21:43:37 +0000181 return chunks
182
Greg Wardcb320eb2002-06-07 22:32:15 +0000183 def _fix_sentence_endings(self, chunks):
Greg Ward00935822002-06-07 21:43:37 +0000184 """_fix_sentence_endings(chunks : [string])
185
186 Correct for sentence endings buried in 'chunks'. Eg. when the
Serhiy Storchaka9f8a8912015-04-03 18:12:41 +0300187 original text contains "... foo.\\nBar ...", munge_whitespace()
Greg Ward00935822002-06-07 21:43:37 +0000188 and split() will convert that to [..., "foo.", " ", "Bar", ...]
189 which has one too few spaces; this method simply changes the one
190 space to two.
191 """
192 i = 0
Benjamin Peterson4ac9ce42009-10-04 14:49:41 +0000193 patsearch = self.sentence_end_re.search
Greg Ward00935822002-06-07 21:43:37 +0000194 while i < len(chunks)-1:
Benjamin Peterson4ac9ce42009-10-04 14:49:41 +0000195 if chunks[i+1] == " " and patsearch(chunks[i]):
Greg Ward00935822002-06-07 21:43:37 +0000196 chunks[i+1] = " "
197 i += 2
198 else:
199 i += 1
200
Raymond Hettinger8bfa8932005-07-15 06:53:35 +0000201 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
Greg Ward00935822002-06-07 21:43:37 +0000202 """_handle_long_word(chunks : [string],
203 cur_line : [string],
Greg Ward62080be2002-06-10 21:37:12 +0000204 cur_len : int, width : int)
Greg Ward00935822002-06-07 21:43:37 +0000205
206 Handle a chunk of text (most likely a word, not whitespace) that
207 is too long to fit in any line.
208 """
Georg Brandlfceab5a2008-01-19 20:08:23 +0000209 # Figure out when indent is larger than the specified width, and make
210 # sure at least one character is stripped off on every pass
211 if width < 1:
212 space_left = 1
213 else:
214 space_left = width - cur_len
Greg Ward00935822002-06-07 21:43:37 +0000215
216 # If we're allowed to break long words, then do so: put as much
217 # of the next chunk onto the current line as will fit.
218 if self.break_long_words:
Raymond Hettinger8bfa8932005-07-15 06:53:35 +0000219 cur_line.append(reversed_chunks[-1][:space_left])
220 reversed_chunks[-1] = reversed_chunks[-1][space_left:]
Greg Ward00935822002-06-07 21:43:37 +0000221
222 # Otherwise, we have to preserve the long word intact. Only add
223 # it to the current line if there's nothing already there --
224 # that minimizes how much we violate the width constraint.
225 elif not cur_line:
Raymond Hettinger8bfa8932005-07-15 06:53:35 +0000226 cur_line.append(reversed_chunks.pop())
Greg Ward00935822002-06-07 21:43:37 +0000227
228 # If we're not allowed to break long words, and there's already
229 # text on the current line, do nothing. Next time through the
230 # main loop of _wrap_chunks(), we'll wind up here again, but
231 # cur_len will be zero, so the next line will be entirely
232 # devoted to the long word that we can't handle right now.
233
Greg Wardd34c9592002-06-10 20:26:02 +0000234 def _wrap_chunks(self, chunks):
235 """_wrap_chunks(chunks : [string]) -> [string]
Greg Ward00935822002-06-07 21:43:37 +0000236
237 Wrap a sequence of text chunks and return a list of lines of
Greg Wardd34c9592002-06-10 20:26:02 +0000238 length 'self.width' or less. (If 'break_long_words' is false,
239 some lines may be longer than this.) Chunks correspond roughly
240 to words and the whitespace between them: each chunk is
241 indivisible (modulo 'break_long_words'), but a line break can
242 come between any two chunks. Chunks should not have internal
243 whitespace; ie. a chunk is either all whitespace or a "word".
244 Whitespace chunks will be removed from the beginning and end of
245 lines, but apart from that whitespace is preserved.
Greg Ward00935822002-06-07 21:43:37 +0000246 """
247 lines = []
Greg Ward21820cd2003-05-07 00:55:35 +0000248 if self.width <= 0:
249 raise ValueError("invalid width %r (must be > 0)" % self.width)
Serhiy Storchakaacc9f3f2013-10-15 21:22:54 +0300250 if self.max_lines is not None:
251 if self.max_lines > 1:
252 indent = self.subsequent_indent
253 else:
254 indent = self.initial_indent
255 if len(indent) + len(self.placeholder.lstrip()) > self.width:
256 raise ValueError("placeholder too large for max width")
Greg Ward00935822002-06-07 21:43:37 +0000257
Raymond Hettinger8bfa8932005-07-15 06:53:35 +0000258 # Arrange in reverse order so items can be efficiently popped
259 # from a stack of chucks.
260 chunks.reverse()
261
Greg Ward00935822002-06-07 21:43:37 +0000262 while chunks:
263
Greg Ward62080be2002-06-10 21:37:12 +0000264 # Start the list of chunks that will make up the current line.
265 # cur_len is just the length of all the chunks in cur_line.
266 cur_line = []
267 cur_len = 0
268
269 # Figure out which static string will prefix this line.
270 if lines:
271 indent = self.subsequent_indent
272 else:
273 indent = self.initial_indent
274
275 # Maximum width for this line.
276 width = self.width - len(indent)
Greg Ward00935822002-06-07 21:43:37 +0000277
Greg Wardab73d462002-12-09 16:26:05 +0000278 # First chunk on line is whitespace -- drop it, unless this
279 # is the very beginning of the text (ie. no lines started yet).
Guido van Rossumd8faa362007-04-27 19:54:29 +0000280 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
Raymond Hettinger8bfa8932005-07-15 06:53:35 +0000281 del chunks[-1]
Greg Ward00935822002-06-07 21:43:37 +0000282
283 while chunks:
Raymond Hettinger8bfa8932005-07-15 06:53:35 +0000284 l = len(chunks[-1])
Greg Ward00935822002-06-07 21:43:37 +0000285
286 # Can at least squeeze this chunk onto the current line.
287 if cur_len + l <= width:
Raymond Hettinger8bfa8932005-07-15 06:53:35 +0000288 cur_line.append(chunks.pop())
Greg Ward00935822002-06-07 21:43:37 +0000289 cur_len += l
290
291 # Nope, this line is full.
292 else:
293 break
294
295 # The current line is full, and the next chunk is too big to
Tim Petersc411dba2002-07-16 21:35:23 +0000296 # fit on *any* line (not just this one).
Raymond Hettinger8bfa8932005-07-15 06:53:35 +0000297 if chunks and len(chunks[-1]) > width:
Greg Ward62080be2002-06-10 21:37:12 +0000298 self._handle_long_word(chunks, cur_line, cur_len, width)
Serhiy Storchakaacc9f3f2013-10-15 21:22:54 +0300299 cur_len = sum(map(len, cur_line))
Greg Ward00935822002-06-07 21:43:37 +0000300
301 # If the last chunk on this line is all whitespace, drop it.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000302 if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
Serhiy Storchakaacc9f3f2013-10-15 21:22:54 +0300303 cur_len -= len(cur_line[-1])
Greg Ward00935822002-06-07 21:43:37 +0000304 del cur_line[-1]
305
Greg Ward00935822002-06-07 21:43:37 +0000306 if cur_line:
Serhiy Storchakaacc9f3f2013-10-15 21:22:54 +0300307 if (self.max_lines is None or
308 len(lines) + 1 < self.max_lines or
309 (not chunks or
310 self.drop_whitespace and
311 len(chunks) == 1 and
312 not chunks[0].strip()) and cur_len <= width):
313 # Convert current line back to a string and store it in
314 # list of all lines (return value).
315 lines.append(indent + ''.join(cur_line))
316 else:
317 while cur_line:
318 if (cur_line[-1].strip() and
319 cur_len + len(self.placeholder) <= width):
320 cur_line.append(self.placeholder)
321 lines.append(indent + ''.join(cur_line))
322 break
323 cur_len -= len(cur_line[-1])
324 del cur_line[-1]
325 else:
326 if lines:
327 prev_line = lines[-1].rstrip()
328 if (len(prev_line) + len(self.placeholder) <=
329 self.width):
330 lines[-1] = prev_line + self.placeholder
331 break
332 lines.append(indent + self.placeholder.lstrip())
333 break
Greg Ward00935822002-06-07 21:43:37 +0000334
335 return lines
336
Antoine Pitrou389dec82013-08-12 22:39:09 +0200337 def _split_chunks(self, text):
338 text = self._munge_whitespace(text)
339 return self._split(text)
Greg Ward00935822002-06-07 21:43:37 +0000340
341 # -- Public interface ----------------------------------------------
342
Greg Wardd34c9592002-06-10 20:26:02 +0000343 def wrap(self, text):
344 """wrap(text : string) -> [string]
Greg Ward00935822002-06-07 21:43:37 +0000345
Greg Warde807e572002-07-04 14:51:49 +0000346 Reformat the single paragraph in 'text' so it fits in lines of
347 no more than 'self.width' columns, and return a list of wrapped
348 lines. Tabs in 'text' are expanded with string.expandtabs(),
349 and all other whitespace characters (including newline) are
350 converted to space.
Greg Ward00935822002-06-07 21:43:37 +0000351 """
Antoine Pitrou389dec82013-08-12 22:39:09 +0200352 chunks = self._split_chunks(text)
Greg Ward62e4f3b2002-06-07 21:56:16 +0000353 if self.fix_sentence_endings:
354 self._fix_sentence_endings(chunks)
Greg Wardd34c9592002-06-10 20:26:02 +0000355 return self._wrap_chunks(chunks)
Greg Ward00935822002-06-07 21:43:37 +0000356
Greg Ward62080be2002-06-10 21:37:12 +0000357 def fill(self, text):
358 """fill(text : string) -> string
Greg Ward00935822002-06-07 21:43:37 +0000359
Greg Warde807e572002-07-04 14:51:49 +0000360 Reformat the single paragraph in 'text' to fit in lines of no
361 more than 'self.width' columns, and return a new string
362 containing the entire wrapped paragraph.
Greg Ward00935822002-06-07 21:43:37 +0000363 """
Greg Ward62080be2002-06-10 21:37:12 +0000364 return "\n".join(self.wrap(text))
Greg Ward00935822002-06-07 21:43:37 +0000365
366
Greg Warde807e572002-07-04 14:51:49 +0000367# -- Convenience interface ---------------------------------------------
Greg Ward00935822002-06-07 21:43:37 +0000368
Greg Wardcf02ac62002-06-10 20:36:07 +0000369def wrap(text, width=70, **kwargs):
Greg Warde807e572002-07-04 14:51:49 +0000370 """Wrap a single paragraph of text, returning a list of wrapped lines.
371
372 Reformat the single paragraph in 'text' so it fits in lines of no
373 more than 'width' columns, and return a list of wrapped lines. By
374 default, tabs in 'text' are expanded with string.expandtabs(), and
375 all other whitespace characters (including newline) are converted to
376 space. See TextWrapper class for available keyword args to customize
377 wrapping behaviour.
378 """
Greg Wardcf02ac62002-06-10 20:36:07 +0000379 w = TextWrapper(width=width, **kwargs)
380 return w.wrap(text)
Greg Ward00935822002-06-07 21:43:37 +0000381
Greg Ward62080be2002-06-10 21:37:12 +0000382def fill(text, width=70, **kwargs):
Greg Warde807e572002-07-04 14:51:49 +0000383 """Fill a single paragraph of text, returning a new string.
384
385 Reformat the single paragraph in 'text' to fit in lines of no more
386 than 'width' columns, and return a new string containing the entire
387 wrapped paragraph. As with wrap(), tabs are expanded and other
388 whitespace characters converted to space. See TextWrapper class for
389 available keyword args to customize wrapping behaviour.
390 """
Greg Wardcf02ac62002-06-10 20:36:07 +0000391 w = TextWrapper(width=width, **kwargs)
Greg Ward62080be2002-06-10 21:37:12 +0000392 return w.fill(text)
Greg Ward478cd482003-05-08 01:58:05 +0000393
Serhiy Storchakaacc9f3f2013-10-15 21:22:54 +0300394def shorten(text, width, **kwargs):
Antoine Pitrou389dec82013-08-12 22:39:09 +0200395 """Collapse and truncate the given text to fit in the given width.
396
397 The text first has its whitespace collapsed. If it then fits in
398 the *width*, it is returned as is. Otherwise, as many words
399 as possible are joined and then the placeholder is appended::
400
401 >>> textwrap.shorten("Hello world!", width=12)
402 'Hello world!'
403 >>> textwrap.shorten("Hello world!", width=11)
Antoine Pitrouc5930562013-08-16 22:31:12 +0200404 'Hello [...]'
Antoine Pitrou389dec82013-08-12 22:39:09 +0200405 """
Serhiy Storchakaacc9f3f2013-10-15 21:22:54 +0300406 w = TextWrapper(width=width, max_lines=1, **kwargs)
407 return w.fill(' '.join(text.strip().split()))
Antoine Pitrou389dec82013-08-12 22:39:09 +0200408
Greg Ward478cd482003-05-08 01:58:05 +0000409
410# -- Loosely related functionality -------------------------------------
411
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000412_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
413_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
414
Greg Ward478cd482003-05-08 01:58:05 +0000415def dedent(text):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000416 """Remove any common leading whitespace from every line in `text`.
Greg Ward478cd482003-05-08 01:58:05 +0000417
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000418 This can be used to make triple-quoted strings line up with the left
419 edge of the display, while still presenting them in the source code
420 in indented form.
Greg Ward478cd482003-05-08 01:58:05 +0000421
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000422 Note that tabs and spaces are both treated as whitespace, but they
Serhiy Storchaka9f8a8912015-04-03 18:12:41 +0300423 are not equal: the lines " hello" and "\\thello" are
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000424 considered to have no common leading whitespace. (This behaviour is
425 new in Python 2.5; older versions of this module incorrectly
426 expanded tabs before searching for common leading whitespace.)
Greg Ward478cd482003-05-08 01:58:05 +0000427 """
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000428 # Look for the longest leading string of spaces and tabs common to
429 # all lines.
Greg Ward478cd482003-05-08 01:58:05 +0000430 margin = None
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000431 text = _whitespace_only_re.sub('', text)
432 indents = _leading_whitespace_re.findall(text)
433 for indent in indents:
Greg Ward478cd482003-05-08 01:58:05 +0000434 if margin is None:
435 margin = indent
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000436
437 # Current line more deeply indented than previous winner:
438 # no change (previous winner is still on top).
439 elif indent.startswith(margin):
440 pass
441
442 # Current line consistent with and no deeper than previous winner:
443 # it's the new winner.
444 elif margin.startswith(indent):
445 margin = indent
446
447 # Current line and previous winner have no common whitespace:
448 # there is no margin.
Greg Ward478cd482003-05-08 01:58:05 +0000449 else:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000450 margin = ""
451 break
Greg Ward478cd482003-05-08 01:58:05 +0000452
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000453 # sanity check (testing/debugging only)
454 if 0 and margin:
455 for line in text.split("\n"):
456 assert not line or line.startswith(margin), \
457 "line = %r, margin = %r" % (line, margin)
Greg Ward478cd482003-05-08 01:58:05 +0000458
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000459 if margin:
460 text = re.sub(r'(?m)^' + margin, '', text)
461 return text
462
Nick Coghlan4fae8cd2012-06-11 23:07:51 +1000463
464def indent(text, prefix, predicate=None):
465 """Adds 'prefix' to the beginning of selected lines in 'text'.
466
467 If 'predicate' is provided, 'prefix' will only be added to the lines
468 where 'predicate(line)' is True. If 'predicate' is not provided,
469 it will default to adding 'prefix' to all non-empty lines that do not
470 consist solely of whitespace characters.
471 """
472 if predicate is None:
473 def predicate(line):
474 return line.strip()
475
476 def prefixed_lines():
477 for line in text.splitlines(True):
478 yield (prefix + line if predicate(line) else line)
479 return ''.join(prefixed_lines())
480
481
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000482if __name__ == "__main__":
483 #print dedent("\tfoo\n\tbar")
484 #print dedent(" \thello there\n \t how are you?")
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000485 print(dedent("Hello there.\n This is indented."))