blob: 62ea0b48e6c18955c08a43805067c072387fee61 [file] [log] [blame]
Greg Warde807e572002-07-04 14:51:49 +00001"""Text wrapping and filling.
Greg Ward00935822002-06-07 21:43:37 +00002"""
3
Greg Ward78cc0512002-10-13 19:23:18 +00004# Copyright (C) 1999-2001 Gregory P. Ward.
Greg Ward523008c2003-06-15 15:37:18 +00005# Copyright (C) 2002, 2003 Python Software Foundation.
Greg Ward698d9f02002-06-07 22:40:23 +00006# Written by Greg Ward <gward@python.net>
7
Greg Ward00935822002-06-07 21:43:37 +00008__revision__ = "$Id$"
9
10import string, re
11
Martin v. Löwised11a5d2012-05-20 10:42:17 +020012try:
13 _unicode = unicode
14except NameError:
15 # If Python is built without Unicode support, the unicode type
16 # will not exist. Fake one.
17 class _unicode(object):
18 pass
19
Greg Ward523008c2003-06-15 15:37:18 +000020# Do the right thing with boolean values for all known Python versions
21# (so this module can be copied to projects that don't depend on Python
Brett Cannon791ec1f2008-08-01 01:34:05 +000022# 2.3, e.g. Optik and Docutils) by uncommenting the block of code below.
23#try:
24# True, False
25#except NameError:
26# (True, False) = (1, 0)
Greg Ward523008c2003-06-15 15:37:18 +000027
Georg Brandl3129ea22008-12-05 11:34:51 +000028__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent']
Greg Ward4c6c9c42003-02-03 14:46:57 +000029
Greg Wardafd44de2002-12-12 17:24:35 +000030# Hardcode the recognized whitespace characters to the US-ASCII
31# whitespace characters. The main reason for doing this is that in
32# ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales
33# that character winds up in string.whitespace. Respecting
34# string.whitespace in those cases would 1) make textwrap treat 0xa0 the
35# same as any other whitespace char, which is clearly wrong (it's a
36# *non-breaking* space), 2) possibly cause problems with Unicode,
37# since 0xa0 is not in range(128).
Greg Ward4c6c9c42003-02-03 14:46:57 +000038_whitespace = '\t\n\x0b\x0c\r '
Greg Wardafd44de2002-12-12 17:24:35 +000039
Greg Ward00935822002-06-07 21:43:37 +000040class TextWrapper:
41 """
42 Object for wrapping/filling text. The public interface consists of
43 the wrap() and fill() methods; the other methods are just there for
44 subclasses to override in order to tweak the default behaviour.
45 If you want to completely replace the main wrapping algorithm,
46 you'll probably have to override _wrap_chunks().
47
Greg Wardd34c9592002-06-10 20:26:02 +000048 Several instance attributes control various aspects of wrapping:
49 width (default: 70)
50 the maximum width of wrapped lines (unless break_long_words
51 is false)
Greg Ward62080be2002-06-10 21:37:12 +000052 initial_indent (default: "")
53 string that will be prepended to the first line of wrapped
54 output. Counts towards the line's width.
55 subsequent_indent (default: "")
56 string that will be prepended to all lines save the first
57 of wrapped output; also counts towards each line's width.
Greg Ward62e4f3b2002-06-07 21:56:16 +000058 expand_tabs (default: true)
59 Expand tabs in input text to spaces before further processing.
60 Each tab will become 1 .. 8 spaces, depending on its position in
61 its line. If false, each tab is treated as a single character.
62 replace_whitespace (default: true)
63 Replace all whitespace characters in the input text by spaces
64 after tab expansion. Note that if expand_tabs is false and
65 replace_whitespace is true, every tab will be converted to a
66 single space!
67 fix_sentence_endings (default: false)
68 Ensure that sentence-ending punctuation is always followed
Andrew M. Kuchlinga2ecabe2003-02-14 01:14:15 +000069 by two spaces. Off by default because the algorithm is
Greg Ward62e4f3b2002-06-07 21:56:16 +000070 (unavoidably) imperfect.
71 break_long_words (default: true)
Greg Wardd34c9592002-06-10 20:26:02 +000072 Break words longer than 'width'. If false, those words will not
73 be broken, and some lines might be longer than 'width'.
Georg Brandl6f95ae52008-05-11 10:42:28 +000074 break_on_hyphens (default: true)
75 Allow breaking hyphenated words. If true, wrapping will occur
76 preferably on whitespaces and right after hyphens part of
77 compound words.
Georg Brandl9e6b4702007-03-13 18:15:41 +000078 drop_whitespace (default: true)
79 Drop leading and trailing whitespace from lines.
Greg Ward00935822002-06-07 21:43:37 +000080 """
81
Greg Ward4c6c9c42003-02-03 14:46:57 +000082 whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace))
Greg Ward00935822002-06-07 21:43:37 +000083
Greg Ward2e745412002-12-09 16:23:08 +000084 unicode_whitespace_trans = {}
Greg Ward0e88c9f2002-12-11 13:54:20 +000085 uspace = ord(u' ')
Greg Ward4c6c9c42003-02-03 14:46:57 +000086 for x in map(ord, _whitespace):
Greg Ward0e88c9f2002-12-11 13:54:20 +000087 unicode_whitespace_trans[x] = uspace
Greg Ward2e745412002-12-09 16:23:08 +000088
Tim Petersc411dba2002-07-16 21:35:23 +000089 # This funky little regex is just the trick for splitting
Greg Ward00935822002-06-07 21:43:37 +000090 # text up into word-wrappable chunks. E.g.
91 # "Hello there -- you goof-ball, use the -b option!"
92 # splits into
93 # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
94 # (after stripping out empty strings).
Georg Brandl3eef4412008-12-27 18:27:53 +000095 wordsep_re = re.compile(
Greg Ward40407942005-03-05 02:53:17 +000096 r'(\s+|' # any whitespace
Antoine Pitrou74af3bb2008-12-13 23:12:30 +000097 r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words
Greg Ward40407942005-03-05 02:53:17 +000098 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
Greg Ward00935822002-06-07 21:43:37 +000099
Georg Brandl6f95ae52008-05-11 10:42:28 +0000100 # This less funky little regex just split on recognized spaces. E.g.
101 # "Hello there -- you goof-ball, use the -b option!"
102 # splits into
103 # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
Georg Brandl3eef4412008-12-27 18:27:53 +0000104 wordsep_simple_re = re.compile(r'(\s+)')
Georg Brandl6f95ae52008-05-11 10:42:28 +0000105
Greg Ward61864102004-06-03 01:59:41 +0000106 # XXX this is not locale- or charset-aware -- string.lowercase
107 # is US-ASCII only (and therefore English-only)
Greg Ward9b4864e2002-06-07 22:04:15 +0000108 sentence_end_re = re.compile(r'[%s]' # lowercase letter
109 r'[\.\!\?]' # sentence-ending punct.
110 r'[\"\']?' # optional end-of-quote
Mark Dickinsonfe536f52008-04-25 16:59:09 +0000111 r'\Z' # end of chunk
Greg Ward9b4864e2002-06-07 22:04:15 +0000112 % string.lowercase)
Greg Ward62e4f3b2002-06-07 21:56:16 +0000113
Greg Ward00935822002-06-07 21:43:37 +0000114
Greg Wardf0ba7642004-05-13 01:53:10 +0000115 def __init__(self,
116 width=70,
117 initial_indent="",
118 subsequent_indent="",
119 expand_tabs=True,
120 replace_whitespace=True,
121 fix_sentence_endings=False,
Georg Brandl9e6b4702007-03-13 18:15:41 +0000122 break_long_words=True,
Georg Brandl6f95ae52008-05-11 10:42:28 +0000123 drop_whitespace=True,
124 break_on_hyphens=True):
Greg Wardd34c9592002-06-10 20:26:02 +0000125 self.width = width
Greg Ward62080be2002-06-10 21:37:12 +0000126 self.initial_indent = initial_indent
127 self.subsequent_indent = subsequent_indent
Greg Ward47df99d2002-06-09 00:22:07 +0000128 self.expand_tabs = expand_tabs
129 self.replace_whitespace = replace_whitespace
130 self.fix_sentence_endings = fix_sentence_endings
131 self.break_long_words = break_long_words
Georg Brandl9e6b4702007-03-13 18:15:41 +0000132 self.drop_whitespace = drop_whitespace
Georg Brandl6f95ae52008-05-11 10:42:28 +0000133 self.break_on_hyphens = break_on_hyphens
Tim Petersc411dba2002-07-16 21:35:23 +0000134
Georg Brandl3eef4412008-12-27 18:27:53 +0000135 # recompile the regexes for Unicode mode -- done in this clumsy way for
136 # backwards compatibility because it's rather common to monkey-patch
137 # the TextWrapper class' wordsep_re attribute.
138 self.wordsep_re_uni = re.compile(self.wordsep_re.pattern, re.U)
139 self.wordsep_simple_re_uni = re.compile(
140 self.wordsep_simple_re.pattern, re.U)
141
Greg Ward00935822002-06-07 21:43:37 +0000142
143 # -- Private methods -----------------------------------------------
144 # (possibly useful for subclasses to override)
145
Greg Wardcb320eb2002-06-07 22:32:15 +0000146 def _munge_whitespace(self, text):
Greg Ward00935822002-06-07 21:43:37 +0000147 """_munge_whitespace(text : string) -> string
148
149 Munge whitespace in text: expand tabs and convert all other
150 whitespace characters to spaces. Eg. " foo\tbar\n\nbaz"
151 becomes " foo bar baz".
152 """
153 if self.expand_tabs:
154 text = text.expandtabs()
155 if self.replace_whitespace:
Greg Ward2e745412002-12-09 16:23:08 +0000156 if isinstance(text, str):
157 text = text.translate(self.whitespace_trans)
Martin v. Löwised11a5d2012-05-20 10:42:17 +0200158 elif isinstance(text, _unicode):
Greg Ward2e745412002-12-09 16:23:08 +0000159 text = text.translate(self.unicode_whitespace_trans)
Greg Ward00935822002-06-07 21:43:37 +0000160 return text
161
162
Greg Wardcb320eb2002-06-07 22:32:15 +0000163 def _split(self, text):
Greg Ward00935822002-06-07 21:43:37 +0000164 """_split(text : string) -> [string]
165
166 Split the text to wrap into indivisible chunks. Chunks are
Georg Brandl195261f2009-09-18 16:19:56 +0000167 not quite the same as words; see _wrap_chunks() for full
Greg Ward00935822002-06-07 21:43:37 +0000168 details. As an example, the text
169 Look, goof-ball -- use the -b option!
170 breaks into the following chunks:
171 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
172 'use', ' ', 'the', ' ', '-b', ' ', 'option!'
Georg Brandl6f95ae52008-05-11 10:42:28 +0000173 if break_on_hyphens is True, or in:
174 'Look,', ' ', 'goof-ball', ' ', '--', ' ',
175 'use', ' ', 'the', ' ', '-b', ' ', option!'
176 otherwise.
Greg Ward00935822002-06-07 21:43:37 +0000177 """
Martin v. Löwised11a5d2012-05-20 10:42:17 +0200178 if isinstance(text, _unicode):
Georg Brandl3eef4412008-12-27 18:27:53 +0000179 if self.break_on_hyphens:
180 pat = self.wordsep_re_uni
181 else:
182 pat = self.wordsep_simple_re_uni
Georg Brandl6f95ae52008-05-11 10:42:28 +0000183 else:
Georg Brandl3eef4412008-12-27 18:27:53 +0000184 if self.break_on_hyphens:
185 pat = self.wordsep_re
186 else:
187 pat = self.wordsep_simple_re
188 chunks = pat.split(text)
Georg Brandl9e6b4702007-03-13 18:15:41 +0000189 chunks = filter(None, chunks) # remove empty chunks
Greg Ward00935822002-06-07 21:43:37 +0000190 return chunks
191
Greg Wardcb320eb2002-06-07 22:32:15 +0000192 def _fix_sentence_endings(self, chunks):
Greg Ward00935822002-06-07 21:43:37 +0000193 """_fix_sentence_endings(chunks : [string])
194
195 Correct for sentence endings buried in 'chunks'. Eg. when the
196 original text contains "... foo.\nBar ...", munge_whitespace()
197 and split() will convert that to [..., "foo.", " ", "Bar", ...]
198 which has one too few spaces; this method simply changes the one
199 space to two.
200 """
201 i = 0
Georg Brandl195261f2009-09-18 16:19:56 +0000202 patsearch = self.sentence_end_re.search
Greg Ward00935822002-06-07 21:43:37 +0000203 while i < len(chunks)-1:
Georg Brandl195261f2009-09-18 16:19:56 +0000204 if chunks[i+1] == " " and patsearch(chunks[i]):
Greg Ward00935822002-06-07 21:43:37 +0000205 chunks[i+1] = " "
206 i += 2
207 else:
208 i += 1
209
Raymond Hettinger8bfa8932005-07-15 06:53:35 +0000210 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
Greg Ward00935822002-06-07 21:43:37 +0000211 """_handle_long_word(chunks : [string],
212 cur_line : [string],
Greg Ward62080be2002-06-10 21:37:12 +0000213 cur_len : int, width : int)
Greg Ward00935822002-06-07 21:43:37 +0000214
215 Handle a chunk of text (most likely a word, not whitespace) that
216 is too long to fit in any line.
217 """
Georg Brandlc6fde722008-01-19 19:48:19 +0000218 # Figure out when indent is larger than the specified width, and make
219 # sure at least one character is stripped off on every pass
220 if width < 1:
221 space_left = 1
222 else:
223 space_left = width - cur_len
Greg Ward00935822002-06-07 21:43:37 +0000224
225 # If we're allowed to break long words, then do so: put as much
226 # of the next chunk onto the current line as will fit.
227 if self.break_long_words:
Raymond Hettinger8bfa8932005-07-15 06:53:35 +0000228 cur_line.append(reversed_chunks[-1][:space_left])
229 reversed_chunks[-1] = reversed_chunks[-1][space_left:]
Greg Ward00935822002-06-07 21:43:37 +0000230
231 # Otherwise, we have to preserve the long word intact. Only add
232 # it to the current line if there's nothing already there --
233 # that minimizes how much we violate the width constraint.
234 elif not cur_line:
Raymond Hettinger8bfa8932005-07-15 06:53:35 +0000235 cur_line.append(reversed_chunks.pop())
Greg Ward00935822002-06-07 21:43:37 +0000236
237 # If we're not allowed to break long words, and there's already
238 # text on the current line, do nothing. Next time through the
239 # main loop of _wrap_chunks(), we'll wind up here again, but
240 # cur_len will be zero, so the next line will be entirely
241 # devoted to the long word that we can't handle right now.
242
Greg Wardd34c9592002-06-10 20:26:02 +0000243 def _wrap_chunks(self, chunks):
244 """_wrap_chunks(chunks : [string]) -> [string]
Greg Ward00935822002-06-07 21:43:37 +0000245
246 Wrap a sequence of text chunks and return a list of lines of
Greg Wardd34c9592002-06-10 20:26:02 +0000247 length 'self.width' or less. (If 'break_long_words' is false,
248 some lines may be longer than this.) Chunks correspond roughly
249 to words and the whitespace between them: each chunk is
250 indivisible (modulo 'break_long_words'), but a line break can
251 come between any two chunks. Chunks should not have internal
252 whitespace; ie. a chunk is either all whitespace or a "word".
253 Whitespace chunks will be removed from the beginning and end of
254 lines, but apart from that whitespace is preserved.
Greg Ward00935822002-06-07 21:43:37 +0000255 """
256 lines = []
Greg Ward21820cd2003-05-07 00:55:35 +0000257 if self.width <= 0:
258 raise ValueError("invalid width %r (must be > 0)" % self.width)
Greg Ward00935822002-06-07 21:43:37 +0000259
Raymond Hettinger8bfa8932005-07-15 06:53:35 +0000260 # Arrange in reverse order so items can be efficiently popped
261 # from a stack of chucks.
262 chunks.reverse()
263
Greg Ward00935822002-06-07 21:43:37 +0000264 while chunks:
265
Greg Ward62080be2002-06-10 21:37:12 +0000266 # Start the list of chunks that will make up the current line.
267 # cur_len is just the length of all the chunks in cur_line.
268 cur_line = []
269 cur_len = 0
270
271 # Figure out which static string will prefix this line.
272 if lines:
273 indent = self.subsequent_indent
274 else:
275 indent = self.initial_indent
276
277 # Maximum width for this line.
278 width = self.width - len(indent)
Greg Ward00935822002-06-07 21:43:37 +0000279
Greg Wardab73d462002-12-09 16:26:05 +0000280 # First chunk on line is whitespace -- drop it, unless this
281 # is the very beginning of the text (ie. no lines started yet).
Georg Brandl9e6b4702007-03-13 18:15:41 +0000282 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
Raymond Hettinger8bfa8932005-07-15 06:53:35 +0000283 del chunks[-1]
Greg Ward00935822002-06-07 21:43:37 +0000284
285 while chunks:
Raymond Hettinger8bfa8932005-07-15 06:53:35 +0000286 l = len(chunks[-1])
Greg Ward00935822002-06-07 21:43:37 +0000287
288 # Can at least squeeze this chunk onto the current line.
289 if cur_len + l <= width:
Raymond Hettinger8bfa8932005-07-15 06:53:35 +0000290 cur_line.append(chunks.pop())
Greg Ward00935822002-06-07 21:43:37 +0000291 cur_len += l
292
293 # Nope, this line is full.
294 else:
295 break
296
297 # The current line is full, and the next chunk is too big to
Tim Petersc411dba2002-07-16 21:35:23 +0000298 # fit on *any* line (not just this one).
Raymond Hettinger8bfa8932005-07-15 06:53:35 +0000299 if chunks and len(chunks[-1]) > width:
Greg Ward62080be2002-06-10 21:37:12 +0000300 self._handle_long_word(chunks, cur_line, cur_len, width)
Greg Ward00935822002-06-07 21:43:37 +0000301
302 # If the last chunk on this line is all whitespace, drop it.
Georg Brandl9e6b4702007-03-13 18:15:41 +0000303 if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
Greg Ward00935822002-06-07 21:43:37 +0000304 del cur_line[-1]
305
306 # Convert current line back to a string and store it in list
307 # of all lines (return value).
308 if cur_line:
Greg Ward62080be2002-06-10 21:37:12 +0000309 lines.append(indent + ''.join(cur_line))
Greg Ward00935822002-06-07 21:43:37 +0000310
311 return lines
312
313
314 # -- Public interface ----------------------------------------------
315
Greg Wardd34c9592002-06-10 20:26:02 +0000316 def wrap(self, text):
317 """wrap(text : string) -> [string]
Greg Ward00935822002-06-07 21:43:37 +0000318
Greg Warde807e572002-07-04 14:51:49 +0000319 Reformat the single paragraph in 'text' so it fits in lines of
320 no more than 'self.width' columns, and return a list of wrapped
321 lines. Tabs in 'text' are expanded with string.expandtabs(),
322 and all other whitespace characters (including newline) are
323 converted to space.
Greg Ward00935822002-06-07 21:43:37 +0000324 """
325 text = self._munge_whitespace(text)
Greg Ward00935822002-06-07 21:43:37 +0000326 chunks = self._split(text)
Greg Ward62e4f3b2002-06-07 21:56:16 +0000327 if self.fix_sentence_endings:
328 self._fix_sentence_endings(chunks)
Greg Wardd34c9592002-06-10 20:26:02 +0000329 return self._wrap_chunks(chunks)
Greg Ward00935822002-06-07 21:43:37 +0000330
Greg Ward62080be2002-06-10 21:37:12 +0000331 def fill(self, text):
332 """fill(text : string) -> string
Greg Ward00935822002-06-07 21:43:37 +0000333
Greg Warde807e572002-07-04 14:51:49 +0000334 Reformat the single paragraph in 'text' to fit in lines of no
335 more than 'self.width' columns, and return a new string
336 containing the entire wrapped paragraph.
Greg Ward00935822002-06-07 21:43:37 +0000337 """
Greg Ward62080be2002-06-10 21:37:12 +0000338 return "\n".join(self.wrap(text))
Greg Ward00935822002-06-07 21:43:37 +0000339
340
Greg Warde807e572002-07-04 14:51:49 +0000341# -- Convenience interface ---------------------------------------------
Greg Ward00935822002-06-07 21:43:37 +0000342
Greg Wardcf02ac62002-06-10 20:36:07 +0000343def wrap(text, width=70, **kwargs):
Greg Warde807e572002-07-04 14:51:49 +0000344 """Wrap a single paragraph of text, returning a list of wrapped lines.
345
346 Reformat the single paragraph in 'text' so it fits in lines of no
347 more than 'width' columns, and return a list of wrapped lines. By
348 default, tabs in 'text' are expanded with string.expandtabs(), and
349 all other whitespace characters (including newline) are converted to
350 space. See TextWrapper class for available keyword args to customize
351 wrapping behaviour.
352 """
Greg Wardcf02ac62002-06-10 20:36:07 +0000353 w = TextWrapper(width=width, **kwargs)
354 return w.wrap(text)
Greg Ward00935822002-06-07 21:43:37 +0000355
Greg Ward62080be2002-06-10 21:37:12 +0000356def fill(text, width=70, **kwargs):
Greg Warde807e572002-07-04 14:51:49 +0000357 """Fill a single paragraph of text, returning a new string.
358
359 Reformat the single paragraph in 'text' to fit in lines of no more
360 than 'width' columns, and return a new string containing the entire
361 wrapped paragraph. As with wrap(), tabs are expanded and other
362 whitespace characters converted to space. See TextWrapper class for
363 available keyword args to customize wrapping behaviour.
364 """
Greg Wardcf02ac62002-06-10 20:36:07 +0000365 w = TextWrapper(width=width, **kwargs)
Greg Ward62080be2002-06-10 21:37:12 +0000366 return w.fill(text)
Greg Ward478cd482003-05-08 01:58:05 +0000367
368
369# -- Loosely related functionality -------------------------------------
370
Greg Ward7f547402006-06-11 00:40:49 +0000371_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
372_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
373
Greg Ward478cd482003-05-08 01:58:05 +0000374def dedent(text):
Greg Ward7f547402006-06-11 00:40:49 +0000375 """Remove any common leading whitespace from every line in `text`.
Greg Ward478cd482003-05-08 01:58:05 +0000376
Greg Ward7f547402006-06-11 00:40:49 +0000377 This can be used to make triple-quoted strings line up with the left
378 edge of the display, while still presenting them in the source code
379 in indented form.
Greg Ward478cd482003-05-08 01:58:05 +0000380
Greg Ward7f547402006-06-11 00:40:49 +0000381 Note that tabs and spaces are both treated as whitespace, but they
382 are not equal: the lines " hello" and "\thello" are
383 considered to have no common leading whitespace. (This behaviour is
384 new in Python 2.5; older versions of this module incorrectly
385 expanded tabs before searching for common leading whitespace.)
Greg Ward478cd482003-05-08 01:58:05 +0000386 """
Greg Ward7f547402006-06-11 00:40:49 +0000387 # Look for the longest leading string of spaces and tabs common to
388 # all lines.
Greg Ward478cd482003-05-08 01:58:05 +0000389 margin = None
Greg Ward7f547402006-06-11 00:40:49 +0000390 text = _whitespace_only_re.sub('', text)
391 indents = _leading_whitespace_re.findall(text)
392 for indent in indents:
Greg Ward478cd482003-05-08 01:58:05 +0000393 if margin is None:
394 margin = indent
Greg Ward7f547402006-06-11 00:40:49 +0000395
396 # Current line more deeply indented than previous winner:
397 # no change (previous winner is still on top).
Tim Peters4f96f1f2006-06-11 19:42:51 +0000398 elif indent.startswith(margin):
399 pass
Greg Ward7f547402006-06-11 00:40:49 +0000400
401 # Current line consistent with and no deeper than previous winner:
402 # it's the new winner.
Tim Peters4f96f1f2006-06-11 19:42:51 +0000403 elif margin.startswith(indent):
404 margin = indent
Greg Ward7f547402006-06-11 00:40:49 +0000405
406 # Current line and previous winner have no common whitespace:
407 # there is no margin.
Greg Ward478cd482003-05-08 01:58:05 +0000408 else:
Greg Ward7f547402006-06-11 00:40:49 +0000409 margin = ""
410 break
Greg Ward478cd482003-05-08 01:58:05 +0000411
Greg Ward7f547402006-06-11 00:40:49 +0000412 # sanity check (testing/debugging only)
413 if 0 and margin:
414 for line in text.split("\n"):
415 assert not line or line.startswith(margin), \
416 "line = %r, margin = %r" % (line, margin)
Greg Ward478cd482003-05-08 01:58:05 +0000417
Greg Ward7f547402006-06-11 00:40:49 +0000418 if margin:
419 text = re.sub(r'(?m)^' + margin, '', text)
420 return text
421
422if __name__ == "__main__":
423 #print dedent("\tfoo\n\tbar")
424 #print dedent(" \thello there\n \t how are you?")
425 print dedent("Hello there.\n This is indented.")