Blame - Lib/textwrap.py - platform/external/python/cpython3

blob: ef6c372ce51301518282de14c62a0070173ac33d [file] [log] [blame]

Greg Ward	0093582	2002-06-07 21:43:37 +0000	[diff] [blame]	1	"""
				2	Utilities for wrapping text strings and filling text paragraphs.
				3	"""
				4
				5	__revision__ = "$Id$"
				6
				7	import string, re
				8
				9
				10	# XXX is this going to be implemented properly somewhere in 2.3?
				11	def islower (c):
				12	return c in string.lowercase
				13
				14
				15	class TextWrapper:
				16	"""
				17	Object for wrapping/filling text. The public interface consists of
				18	the wrap() and fill() methods; the other methods are just there for
				19	subclasses to override in order to tweak the default behaviour.
				20	If you want to completely replace the main wrapping algorithm,
				21	you'll probably have to override _wrap_chunks().
				22
				23	Several instance attributes control various aspects of
				24	wrapping:
Greg Ward	62e4f3b	2002-06-07 21:56:16 +0000	[diff] [blame^]	25	expand_tabs (default: true)
				26	Expand tabs in input text to spaces before further processing.
				27	Each tab will become 1 .. 8 spaces, depending on its position in
				28	its line. If false, each tab is treated as a single character.
				29	replace_whitespace (default: true)
				30	Replace all whitespace characters in the input text by spaces
				31	after tab expansion. Note that if expand_tabs is false and
				32	replace_whitespace is true, every tab will be converted to a
				33	single space!
				34	fix_sentence_endings (default: false)
				35	Ensure that sentence-ending punctuation is always followed
				36	by two spaces. Off by default becaus the algorithm is
				37	(unavoidably) imperfect.
				38	break_long_words (default: true)
				39	Break words longer than the line width constraint. If false,
				40	those words will not be broken, and some lines might be longer
				41	than the width constraint.
Greg Ward	0093582	2002-06-07 21:43:37 +0000	[diff] [blame]	42	"""
				43
				44	whitespace_trans = string.maketrans(string.whitespace,
				45	' ' * len(string.whitespace))
				46
				47	# This funky little regex is just the trick for splitting
				48	# text up into word-wrappable chunks. E.g.
				49	# "Hello there -- you goof-ball, use the -b option!"
				50	# splits into
				51	# Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
				52	# (after stripping out empty strings).
				53	wordsep_re = re.compile(r'(\s+\|' # any whitespace
				54	r'\w{2,}-(?=\w{2,})\|' # hyphenated words
				55	r'(?<=\w)-{2,}(?=\w))') # em-dash
				56
Greg Ward	62e4f3b	2002-06-07 21:56:16 +0000	[diff] [blame^]	57	# Punctuation characters found at the end of a sentence.
				58	sentence_end = ".?!"
				59
Greg Ward	0093582	2002-06-07 21:43:37 +0000	[diff] [blame]	60
				61	def __init__ (self):
				62	self.expand_tabs = 1
				63	self.replace_whitespace = 1
Greg Ward	62e4f3b	2002-06-07 21:56:16 +0000	[diff] [blame^]	64	self.fix_sentence_endings = 0
Greg Ward	0093582	2002-06-07 21:43:37 +0000	[diff] [blame]	65	self.break_long_words = 1
				66
				67
				68	# -- Private methods -----------------------------------------------
				69	# (possibly useful for subclasses to override)
				70
				71	def _munge_whitespace (self, text):
				72	"""_munge_whitespace(text : string) -> string
				73
				74	Munge whitespace in text: expand tabs and convert all other
				75	whitespace characters to spaces. Eg. " foo\tbar\n\nbaz"
				76	becomes " foo bar baz".
				77	"""
				78	if self.expand_tabs:
				79	text = text.expandtabs()
				80	if self.replace_whitespace:
				81	text = text.translate(self.whitespace_trans)
				82	return text
				83
				84
				85	def _split (self, text):
				86	"""_split(text : string) -> [string]
				87
				88	Split the text to wrap into indivisible chunks. Chunks are
				89	not quite the same as words; see wrap_chunks() for full
				90	details. As an example, the text
				91	Look, goof-ball -- use the -b option!
				92	breaks into the following chunks:
				93	'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
				94	'use', ' ', 'the', ' ', '-b', ' ', 'option!'
				95	"""
				96	chunks = self.wordsep_re.split(text)
				97	chunks = filter(None, chunks)
				98	return chunks
				99
				100	def _fix_sentence_endings (self, chunks):
				101	"""_fix_sentence_endings(chunks : [string])
				102
				103	Correct for sentence endings buried in 'chunks'. Eg. when the
				104	original text contains "... foo.\nBar ...", munge_whitespace()
				105	and split() will convert that to [..., "foo.", " ", "Bar", ...]
				106	which has one too few spaces; this method simply changes the one
				107	space to two.
				108	"""
				109	i = 0
Greg Ward	62e4f3b	2002-06-07 21:56:16 +0000	[diff] [blame^]	110	punct = self.sentence_end
Greg Ward	0093582	2002-06-07 21:43:37 +0000	[diff] [blame]	111	while i < len(chunks)-1:
				112	# chunks[i] looks like the last word of a sentence,
				113	# and it's followed by a single space.
Greg Ward	62e4f3b	2002-06-07 21:56:16 +0000	[diff] [blame^]	114	if (chunks[i][-1] in punct and
Greg Ward	0093582	2002-06-07 21:43:37 +0000	[diff] [blame]	115	chunks[i+1] == " " and
				116	islower(chunks[i][-2])):
				117	chunks[i+1] = " "
				118	i += 2
				119	else:
				120	i += 1
				121
				122	def _handle_long_word (self, chunks, cur_line, cur_len, width):
				123	"""_handle_long_word(chunks : [string],
				124	cur_line : [string],
				125	cur_len : int, width : int)
				126
				127	Handle a chunk of text (most likely a word, not whitespace) that
				128	is too long to fit in any line.
				129	"""
				130	space_left = width - cur_len
				131
				132	# If we're allowed to break long words, then do so: put as much
				133	# of the next chunk onto the current line as will fit.
				134	if self.break_long_words:
				135	cur_line.append(chunks[0][0:space_left])
				136	chunks[0] = chunks[0][space_left:]
				137
				138	# Otherwise, we have to preserve the long word intact. Only add
				139	# it to the current line if there's nothing already there --
				140	# that minimizes how much we violate the width constraint.
				141	elif not cur_line:
				142	cur_line.append(chunks.pop(0))
				143
				144	# If we're not allowed to break long words, and there's already
				145	# text on the current line, do nothing. Next time through the
				146	# main loop of _wrap_chunks(), we'll wind up here again, but
				147	# cur_len will be zero, so the next line will be entirely
				148	# devoted to the long word that we can't handle right now.
				149
				150	def _wrap_chunks (self, chunks, width):
				151	"""_wrap_chunks(chunks : [string], width : int) -> [string]
				152
				153	Wrap a sequence of text chunks and return a list of lines of
				154	length 'width' or less. (If 'break_long_words' is false, some
				155	lines may be longer than 'width'.) Chunks correspond roughly to
				156	words and the whitespace between them: each chunk is indivisible
				157	(modulo 'break_long_words'), but a line break can come between
				158	any two chunks. Chunks should not have internal whitespace;
				159	ie. a chunk is either all whitespace or a "word". Whitespace
				160	chunks will be removed from the beginning and end of lines, but
				161	apart from that whitespace is preserved.
				162	"""
				163	lines = []
				164
				165	while chunks:
				166
				167	cur_line = [] # list of chunks (to-be-joined)
				168	cur_len = 0 # length of current line
				169
				170	# First chunk on line is whitespace -- drop it.
				171	if chunks[0].strip() == '':
				172	del chunks[0]
				173
				174	while chunks:
				175	l = len(chunks[0])
				176
				177	# Can at least squeeze this chunk onto the current line.
				178	if cur_len + l <= width:
				179	cur_line.append(chunks.pop(0))
				180	cur_len += l
				181
				182	# Nope, this line is full.
				183	else:
				184	break
				185
				186	# The current line is full, and the next chunk is too big to
				187	# fit on any line (not just this one).
				188	if chunks and len(chunks[0]) > width:
				189	self._handle_long_word(chunks, cur_line, cur_len, width)
				190
				191	# If the last chunk on this line is all whitespace, drop it.
				192	if cur_line and cur_line[-1].strip() == '':
				193	del cur_line[-1]
				194
				195	# Convert current line back to a string and store it in list
				196	# of all lines (return value).
				197	if cur_line:
				198	lines.append(''.join(cur_line))
				199
				200	return lines
				201
				202
				203	# -- Public interface ----------------------------------------------
				204
				205	def wrap (self, text, width):
				206	"""wrap(text : string, width : int) -> [string]
				207
				208	Split 'text' into multiple lines of no more than 'width'
				209	characters each, and return the list of strings that results.
				210	Tabs in 'text' are expanded with string.expandtabs(), and all
				211	other whitespace characters (including newline) are converted to
				212	space.
				213	"""
				214	text = self._munge_whitespace(text)
				215	if len(text) <= width:
				216	return [text]
				217	chunks = self._split(text)
Greg Ward	62e4f3b	2002-06-07 21:56:16 +0000	[diff] [blame^]	218	if self.fix_sentence_endings:
				219	self._fix_sentence_endings(chunks)
Greg Ward	0093582	2002-06-07 21:43:37 +0000	[diff] [blame]	220	return self._wrap_chunks(chunks, width)
				221
				222	def fill (self, text, width, initial_tab="", subsequent_tab=""):
				223	"""fill(text : string,
				224	width : int,
				225	initial_tab : string = "",
				226	subsequent_tab : string = "")
				227	-> string
				228
				229	Reformat the paragraph in 'text' to fit in lines of no more than
				230	'width' columns. The first line is prefixed with 'initial_tab',
				231	and subsequent lines are prefixed with 'subsequent_tab'; the
				232	lengths of the tab strings are accounted for when wrapping lines
				233	to fit in 'width' columns.
				234	"""
				235	lines = self.wrap(text, width)
				236	sep = "\n" + subsequent_tab
				237	return initial_tab + sep.join(lines)
				238
				239
				240	# Convenience interface
				241
				242	_wrapper = TextWrapper()
				243
				244	def wrap (text, width):
				245	return _wrapper.wrap(text, width)
				246
				247	def fill (text, width, initial_tab="", subsequent_tab=""):
				248	return _wrapper.fill(text, width, initial_tab, subsequent_tab)