Blame - clang/lib/Format/BreakableToken.h - toolchain/llvm-project

blob: 5fab3f2f1145aa3ebb6d837f3f1bb58bc40d0cc5 [file] [log] [blame]

Erik Pilkington	7adcf29	2018-07-24 00:07:49 +0000	[diff] [blame]	1	//===--- BreakableToken.h - Format C++ code ---------------------- C++ --===//
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	2	//
Chandler Carruth	2946cd7	2019-01-19 08:50:56 +0000	[diff] [blame]	3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	6	//
				7	//===----------------------------------------------------------------------===//
				8	///
				9	/// \file
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	10	/// Declares BreakableToken, BreakableStringLiteral, BreakableComment,
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	11	/// BreakableBlockComment and BreakableLineCommentSection classes, that contain
				12	/// token type-specific logic to break long lines in tokens and reflow content
				13	/// between tokens.
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	14	///
				15	//===----------------------------------------------------------------------===//
				16
Benjamin Kramer	2f5db8b	2014-08-13 16:25:19 +0000	[diff] [blame]	17	#ifndef LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H
				18	#define LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	19
Alexander Kornienko	ffcc010	2013-06-05 14:09:10 +0000	[diff] [blame]	20	#include "Encoding.h"
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	21	#include "TokenAnnotator.h"
				22	#include "WhitespaceManager.h"
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	23	#include "llvm/ADT/StringSet.h"
Krasimir Georgiev	00c5c72	2017-02-02 15:32:19 +0000	[diff] [blame]	24	#include "llvm/Support/Regex.h"
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	25	#include <utility>
				26
				27	namespace clang {
				28	namespace format {
				29
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	30	/// Checks if \p Token switches formatting, like /* clang-format off */.
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	31	/// \p Token must be a comment.
				32	bool switchesFormatting(const FormatToken &Token);
				33
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	34	struct FormatStyle;
				35
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	36	/// Base class for tokens / ranges of tokens that can allow breaking
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	37	/// within the tokens - for example, to avoid whitespace beyond the column
				38	/// limit, or to reflow text.
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	39	///
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	40	/// Generally, a breakable token consists of logical lines, addressed by a line
				41	/// index. For example, in a sequence of line comments, each line comment is its
				42	/// own logical line; similarly, for a block comment, each line in the block
				43	/// comment is on its own logical line.
				44	///
				45	/// There are two methods to compute the layout of the token:
				46	/// - getRangeLength measures the number of columns needed for a range of text
				47	/// within a logical line, and
				48	/// - getContentStartColumn returns the start column at which we want the
				49	/// content of a logical line to start (potentially after introducing a line
				50	/// break).
				51	///
				52	/// The mechanism to adapt the layout of the breakable token is organised
				53	/// around the concept of a \c Split, which is a whitespace range that signifies
				54	/// a position of the content of a token where a reformatting might be done.
				55	///
				56	/// Operating with splits is divided into two operations:
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	57	/// - getSplit, for finding a split starting at a position,
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	58	/// - insertBreak, for executing the split using a whitespace manager.
				59	///
				60	/// There is a pair of operations that are used to compress a long whitespace
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	61	/// range with a single space if that will bring the line length under the
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	62	/// column limit:
				63	/// - getLineLengthAfterCompression, for calculating the size in columns of the
				64	/// line after a whitespace range has been compressed, and
				65	/// - compressWhitespace, for executing the whitespace compression using a
				66	/// whitespace manager; note that the compressed whitespace may be in the
				67	/// middle of the original line and of the reformatted line.
				68	///
				69	/// For tokens where the whitespace before each line needs to be also
				70	/// reformatted, for example for tokens supporting reflow, there are analogous
				71	/// operations that might be executed before the main line breaking occurs:
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	72	/// - getReflowSplit, for finding a split such that the content preceding it
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	73	/// needs to be specially reflown,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	74	/// - reflow, for executing the split using a whitespace manager,
Krasimir Georgiev	35599fd	2017-10-16 09:08:53 +0000	[diff] [blame]	75	/// - introducesBreakBefore, for checking if reformatting the beginning
				76	/// of the content introduces a line break before it,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	77	/// - adaptStartOfLine, for executing the reflow using a whitespace
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	78	/// manager.
				79	///
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	80	/// For tokens that require the whitespace after the last line to be
				81	/// reformatted, for example in multiline jsdoc comments that require the
				82	/// trailing '*/' to be on a line of itself, there are analogous operations
				83	/// that might be executed after the last line has been reformatted:
				84	/// - getSplitAfterLastLine, for finding a split after the last line that needs
				85	/// to be reflown,
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	86	/// - replaceWhitespaceAfterLastLine, for executing the reflow using a
				87	/// whitespace manager.
				88	///
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	89	class BreakableToken {
				90	public:
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	91	/// Contains starting character index and length of split.
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	92	typedef std::pair<StringRef::size_type, unsigned> Split;
				93
Angel Garcia Gomez	637d1e6	2015-10-20 13:23:58 +0000	[diff] [blame]	94	virtual ~BreakableToken() {}
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	95
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	96	/// Returns the number of lines in this token in the original code.
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	97	virtual unsigned getLineCount() const = 0;
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	98
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	99	/// Returns the number of columns required to format the text in the
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	100	/// byte range [\p Offset, \p Offset \c + \p Length).
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	101	///
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	102	/// \p Offset is the byte offset from the start of the content of the line
				103	/// at \p LineIndex.
				104	///
				105	/// \p StartColumn is the column at which the text starts in the formatted
				106	/// file, needed to compute tab stops correctly.
				107	virtual unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
				108	StringRef::size_type Length,
				109	unsigned StartColumn) const = 0;
				110
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	111	/// Returns the number of columns required to format the text following
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	112	/// the byte \p Offset in the line \p LineIndex, including potentially
				113	/// unbreakable sequences of tokens following after the end of the token.
				114	///
				115	/// \p Offset is the byte offset from the start of the content of the line
				116	/// at \p LineIndex.
				117	///
				118	/// \p StartColumn is the column at which the text starts in the formatted
				119	/// file, needed to compute tab stops correctly.
				120	///
				121	/// For breakable tokens that never use extra space at the end of a line, this
				122	/// is equivalent to getRangeLength with a Length of StringRef::npos.
				123	virtual unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
				124	unsigned StartColumn) const {
				125	return getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
				126	}
				127
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	128	/// Returns the column at which content in line \p LineIndex starts,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	129	/// assuming no reflow.
				130	///
				131	/// If \p Break is true, returns the column at which the line should start
				132	/// after the line break.
				133	/// If \p Break is false, returns the column at which the line itself will
				134	/// start.
				135	virtual unsigned getContentStartColumn(unsigned LineIndex,
				136	bool Break) const = 0;
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	137
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	138	/// Returns additional content indent required for the second line after the
				139	/// content at line \p LineIndex is broken.
				140	///
Krasimir Georgiev	e3424bf	2018-07-30 12:22:41 +0000	[diff] [blame]	141	// (Next lines do not start with `///` since otherwise -Wdocumentation picks
				142	// up the example annotations and generates warnings for them)
				143	// For example, Javadoc @param annotations require and indent of 4 spaces and
				144	// in this example getContentIndex(1) returns 4.
				145	// /**
				146	// * @param loooooooooooooong line
				147	// * continuation
				148	// */
Paul Hoad	5bcf99b	2019-03-01 09:09:54 +0000	[diff] [blame]	149	virtual unsigned getContentIndent(unsigned LineIndex) const { return 0; }
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	150
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	151	/// Returns a range (offset, length) at which to break the line at
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	152	/// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	153	/// violate \p ColumnLimit, assuming the text starting at \p TailOffset in
				154	/// the token is formatted starting at ContentStartColumn in the reformatted
				155	/// file.
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	156	virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	157	unsigned ColumnLimit, unsigned ContentStartColumn,
Krasimir Georgiev	17725d8	2017-03-08 08:55:12 +0000	[diff] [blame]	158	llvm::Regex &CommentPragmasRegex) const = 0;
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	159
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	160	/// Emits the previously retrieved \p Split via \p Whitespaces.
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	161	virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	162	unsigned ContentIndent,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	163	WhitespaceManager &Whitespaces) const = 0;
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	164
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	165	/// Returns the number of columns needed to format
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	166	/// \p RemainingTokenColumns, assuming that Split is within the range measured
				167	/// by \p RemainingTokenColumns, and that the whitespace in Split is reduced
				168	/// to a single space.
				169	unsigned getLengthAfterCompression(unsigned RemainingTokenColumns,
				170	Split Split) const;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	171
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	172	/// Replaces the whitespace range described by \p Split with a single
Alexander Kornienko	875395f	2013-11-12 17:50:13 +0000	[diff] [blame]	173	/// space.
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	174	virtual void compressWhitespace(unsigned LineIndex, unsigned TailOffset,
				175	Split Split,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	176	WhitespaceManager &Whitespaces) const = 0;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	177
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	178	/// Returns whether the token supports reflowing text.
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	179	virtual bool supportsReflow() const { return false; }
				180
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	181	/// Returns a whitespace range (offset, length) of the content at \p
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	182	/// LineIndex such that the content of that line is reflown to the end of the
				183	/// previous one.
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	184	///
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	185	/// Returning (StringRef::npos, 0) indicates reflowing is not possible.
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	186	///
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	187	/// The range will include any whitespace preceding the specified line's
				188	/// content.
				189	///
				190	/// If the split is not contained within one token, for example when reflowing
				191	/// line comments, returns (0, <length>).
				192	virtual Split getReflowSplit(unsigned LineIndex,
Krasimir Georgiev	a7a24bf	2017-03-08 08:58:44 +0000	[diff] [blame]	193	llvm::Regex &CommentPragmasRegex) const {
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	194	return Split(StringRef::npos, 0);
				195	}
				196
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	197	/// Reflows the current line into the end of the previous one.
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	198	virtual void reflow(unsigned LineIndex,
				199	WhitespaceManager &Whitespaces) const {}
				200
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	201	/// Returns whether there will be a line break at the start of the
Manuel Klimek	7786614	2017-11-17 11:17:15 +0000	[diff] [blame]	202	/// token.
Paul Hoad	5bcf99b	2019-03-01 09:09:54 +0000	[diff] [blame]	203	virtual bool introducesBreakBeforeToken() const { return false; }
Krasimir Georgiev	35599fd	2017-10-16 09:08:53 +0000	[diff] [blame]	204
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	205	/// Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	206	virtual void adaptStartOfLine(unsigned LineIndex,
				207	WhitespaceManager &Whitespaces) const {}
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	208
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	209	/// Returns a whitespace range (offset, length) of the content at
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	210	/// the last line that needs to be reformatted after the last line has been
				211	/// reformatted.
				212	///
				213	/// A result having offset == StringRef::npos means that no reformat is
				214	/// necessary.
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	215	virtual Split getSplitAfterLastLine(unsigned TailOffset) const {
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	216	return Split(StringRef::npos, 0);
				217	}
				218
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	219	/// Replaces the whitespace from \p SplitAfterLastLine on the last line
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	220	/// after the last line has been formatted by performing a reformatting.
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	221	void replaceWhitespaceAfterLastLine(unsigned TailOffset,
				222	Split SplitAfterLastLine,
				223	WhitespaceManager &Whitespaces) const {
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	224	insertBreak(getLineCount() - 1, TailOffset, SplitAfterLastLine,
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	225	/ContentIndent=/0, Whitespaces);
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	226	}
				227
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	228	/// Updates the next token of \p State to the next token after this
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	229	/// one. This can be used when this token manages a set of underlying tokens
				230	/// as a unit and is responsible for the formatting of the them.
				231	virtual void updateNextToken(LineState &State) const {}
				232
Alexander Kornienko	9e90b62	2013-04-17 17:34:05 +0000	[diff] [blame]	233	protected:
Daniel Jasper	7d42f3f	2017-01-31 11:25:01 +0000	[diff] [blame]	234	BreakableToken(const FormatToken &Tok, bool InPPDirective,
				235	encoding::Encoding Encoding, const FormatStyle &Style)
				236	: Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding),
				237	Style(Style) {}
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	238
Alexander Kornienko	9e90b62	2013-04-17 17:34:05 +0000	[diff] [blame]	239	const FormatToken &Tok;
Alexander Kornienko	be63390	2013-06-14 11:46:10 +0000	[diff] [blame]	240	const bool InPPDirective;
				241	const encoding::Encoding Encoding;
Alexander Kornienko	ebb43ca	2013-09-05 14:08:34 +0000	[diff] [blame]	242	const FormatStyle &Style;
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	243	};
				244
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	245	class BreakableStringLiteral : public BreakableToken {
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	246	public:
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	247	/// Creates a breakable token for a single line string literal.
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	248	///
				249	/// \p StartColumn specifies the column in which the token will start
				250	/// after formatting.
Daniel Jasper	7d42f3f	2017-01-31 11:25:01 +0000	[diff] [blame]	251	BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
				252	StringRef Prefix, StringRef Postfix,
Krasimir Georgiev	55c23a1	2018-01-23 11:26:19 +0000	[diff] [blame]	253	unsigned UnbreakableTailLength, bool InPPDirective,
				254	encoding::Encoding Encoding, const FormatStyle &Style);
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	255
Krasimir Georgiev	17725d8	2017-03-08 08:55:12 +0000	[diff] [blame]	256	Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
Krasimir Georgiev	6bbc706	2018-04-23 10:02:59 +0000	[diff] [blame]	257	unsigned ContentStartColumn,
Krasimir Georgiev	17725d8	2017-03-08 08:55:12 +0000	[diff] [blame]	258	llvm::Regex &CommentPragmasRegex) const override;
Craig Topper	fb6b25b	2014-03-15 04:29:04 +0000	[diff] [blame]	259	void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	260	unsigned ContentIndent,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	261	WhitespaceManager &Whitespaces) const override;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	262	void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	263	WhitespaceManager &Whitespaces) const override {}
				264	unsigned getLineCount() const override;
				265	unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
				266	StringRef::size_type Length,
				267	unsigned StartColumn) const override;
				268	unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
				269	unsigned StartColumn) const override;
				270	unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
				271
				272	protected:
				273	// The column in which the token starts.
				274	unsigned StartColumn;
				275	// The prefix a line needs after a break in the token.
				276	StringRef Prefix;
				277	// The postfix a line needs before introducing a break.
				278	StringRef Postfix;
				279	// The token text excluding the prefix and postfix.
				280	StringRef Line;
				281	// Length of the sequence of tokens after this string literal that cannot
				282	// contain line breaks.
				283	unsigned UnbreakableTailLength;
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	284	};
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	285
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	286	class BreakableComment : public BreakableToken {
				287	protected:
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	288	/// Creates a breakable token for a comment.
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	289	///
Krasimir Georgiev	4b15922	2017-02-21 10:54:50 +0000	[diff] [blame]	290	/// \p StartColumn specifies the column in which the comment will start after
				291	/// formatting.
Daniel Jasper	7d42f3f	2017-01-31 11:25:01 +0000	[diff] [blame]	292	BreakableComment(const FormatToken &Token, unsigned StartColumn,
Daniel Jasper	7d42f3f	2017-01-31 11:25:01 +0000	[diff] [blame]	293	bool InPPDirective, encoding::Encoding Encoding,
				294	const FormatStyle &Style);
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	295
				296	public:
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	297	bool supportsReflow() const override { return true; }
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	298	unsigned getLineCount() const override;
Krasimir Georgiev	17725d8	2017-03-08 08:55:12 +0000	[diff] [blame]	299	Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
Krasimir Georgiev	6bbc706	2018-04-23 10:02:59 +0000	[diff] [blame]	300	unsigned ContentStartColumn,
Krasimir Georgiev	17725d8	2017-03-08 08:55:12 +0000	[diff] [blame]	301	llvm::Regex &CommentPragmasRegex) const override;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	302	void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	303	WhitespaceManager &Whitespaces) const override;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	304
				305	protected:
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	306	// Returns the token containing the line at LineIndex.
				307	const FormatToken &tokenAt(unsigned LineIndex) const;
				308
				309	// Checks if the content of line LineIndex may be reflown with the previous
				310	// line.
Krasimir Georgiev	00c5c72	2017-02-02 15:32:19 +0000	[diff] [blame]	311	virtual bool mayReflow(unsigned LineIndex,
				312	llvm::Regex &CommentPragmasRegex) const = 0;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	313
				314	// Contains the original text of the lines of the block comment.
				315	//
				316	// In case of a block comments, excludes the leading /* in the first line and
				317	// trailing */ in the last line. In case of line comments, excludes the
				318	// leading // and spaces.
				319	SmallVector<StringRef, 16> Lines;
				320
				321	// Contains the text of the lines excluding all leading and trailing
				322	// whitespace between the lines. Note that the decoration (if present) is also
				323	// not considered part of the text.
				324	SmallVector<StringRef, 16> Content;
				325
				326	// Tokens[i] contains a reference to the token containing Lines[i] if the
				327	// whitespace range before that token is managed by this block.
				328	// Otherwise, Tokens[i] is a null pointer.
				329	SmallVector<FormatToken *, 16> Tokens;
				330
				331	// ContentColumn[i] is the target column at which Content[i] should be.
				332	// Note that this excludes a leading "* " or "*" in case of block comments
				333	// where all lines have a "*" prefix, or the leading "// " or "//" in case of
				334	// line comments.
				335	//
				336	// In block comments, the first line's target column is always positive. The
				337	// remaining lines' target columns are relative to the first line to allow
				338	// correct indentation of comments in \c WhitespaceManager. Thus they can be
				339	// negative as well (in case the first line needs to be unindented more than
				340	// there's actual whitespace in another line).
				341	SmallVector<int, 16> ContentColumn;
				342
				343	// The intended start column of the first line of text from this section.
				344	unsigned StartColumn;
				345
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	346	// The prefix to use in front a line that has been reflown up.
				347	// For example, when reflowing the second line after the first here:
				348	// // comment 1
				349	// // comment 2
				350	// we expect:
				351	// // comment 1 comment 2
				352	// and not:
				353	// // comment 1comment 2
				354	StringRef ReflowPrefix = " ";
				355	};
				356
				357	class BreakableBlockComment : public BreakableComment {
				358	public:
Daniel Jasper	7d42f3f	2017-01-31 11:25:01 +0000	[diff] [blame]	359	BreakableBlockComment(const FormatToken &Token, unsigned StartColumn,
				360	unsigned OriginalStartColumn, bool FirstInLine,
				361	bool InPPDirective, encoding::Encoding Encoding,
				362	const FormatStyle &Style);
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	363
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	364	unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
				365	StringRef::size_type Length,
				366	unsigned StartColumn) const override;
				367	unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
				368	unsigned StartColumn) const override;
				369	unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	370	unsigned getContentIndent(unsigned LineIndex) const override;
Craig Topper	fb6b25b	2014-03-15 04:29:04 +0000	[diff] [blame]	371	void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	372	unsigned ContentIndent,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	373	WhitespaceManager &Whitespaces) const override;
				374	Split getReflowSplit(unsigned LineIndex,
Krasimir Georgiev	00c5c72	2017-02-02 15:32:19 +0000	[diff] [blame]	375	llvm::Regex &CommentPragmasRegex) const override;
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	376	void reflow(unsigned LineIndex,
				377	WhitespaceManager &Whitespaces) const override;
Manuel Klimek	7786614	2017-11-17 11:17:15 +0000	[diff] [blame]	378	bool introducesBreakBeforeToken() const override;
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	379	void adaptStartOfLine(unsigned LineIndex,
				380	WhitespaceManager &Whitespaces) const override;
				381	Split getSplitAfterLastLine(unsigned TailOffset) const override;
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	382
Krasimir Georgiev	00c5c72	2017-02-02 15:32:19 +0000	[diff] [blame]	383	bool mayReflow(unsigned LineIndex,
				384	llvm::Regex &CommentPragmasRegex) const override;
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	385
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	386	// Contains Javadoc annotations that require additional indent when continued
				387	// on multiple lines.
				388	static const llvm::StringSet<> ContentIndentingJavadocAnnotations;
				389
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	390	private:
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	391	// Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex].
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	392	//
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	393	// Updates Content[LineIndex-1] and Content[LineIndex] by stripping off
				394	// leading and trailing whitespace.
				395	//
				396	// Sets ContentColumn to the intended column in which the text at
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	397	// Lines[LineIndex] starts (note that the decoration, if present, is not
				398	// considered part of the text).
Alexander Kornienko	ebb43ca	2013-09-05 14:08:34 +0000	[diff] [blame]	399	void adjustWhitespace(unsigned LineIndex, int IndentDelta);
Alexander Kornienko	9e90b62	2013-04-17 17:34:05 +0000	[diff] [blame]	400
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	401	// The column at which the text of a broken line should start.
				402	// Note that an optional decoration would go before that column.
				403	// IndentAtLineBreak is a uniform position for all lines in a block comment,
				404	// regardless of their relative position.
				405	// FIXME: Revisit the decision to do this; the main reason was to support
				406	// patterns like
				407	// /************//
				408	// * Comment
				409	// We could also support such patterns by special casing the first line
				410	// instead.
				411	unsigned IndentAtLineBreak;
				412
Alexander Kornienko	614d96a	2013-07-08 14:12:07 +0000	[diff] [blame]	413	// This is to distinguish between the case when the last line was empty and
				414	// the case when it started with a decoration ("" or " ").
				415	bool LastLineNeedsDecoration;
				416
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	417	// Either "* " if all lines begin with a "*", or empty.
				418	StringRef Decoration;
Krasimir Georgiev	bb99a36	2017-02-16 12:39:31 +0000	[diff] [blame]	419
				420	// If this block comment has decorations, this is the column of the start of
				421	// the decorations.
				422	unsigned DecorationColumn;
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	423
				424	// If true, make sure that the opening '/*' and the closing '/' ends on a
				425	// line of itself. Styles like jsdoc require this for multiline comments.
				426	bool DelimitersOnNewline;
Manuel Klimek	48c930c	2017-12-04 08:53:16 +0000	[diff] [blame]	427
				428	// Length of the sequence of tokens after this string literal that cannot
				429	// contain line breaks.
				430	unsigned UnbreakableTailLength;
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	431	};
				432
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	433	class BreakableLineCommentSection : public BreakableComment {
				434	public:
Daniel Jasper	7d42f3f	2017-01-31 11:25:01 +0000	[diff] [blame]	435	BreakableLineCommentSection(const FormatToken &Token, unsigned StartColumn,
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	436	unsigned OriginalStartColumn, bool FirstInLine,
				437	bool InPPDirective, encoding::Encoding Encoding,
				438	const FormatStyle &Style);
				439
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	440	unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
				441	StringRef::size_type Length,
				442	unsigned StartColumn) const override;
				443	unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	444	void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	445	unsigned ContentIndent,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	446	WhitespaceManager &Whitespaces) const override;
				447	Split getReflowSplit(unsigned LineIndex,
Krasimir Georgiev	00c5c72	2017-02-02 15:32:19 +0000	[diff] [blame]	448	llvm::Regex &CommentPragmasRegex) const override;
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	449	void reflow(unsigned LineIndex,
				450	WhitespaceManager &Whitespaces) const override;
				451	void adaptStartOfLine(unsigned LineIndex,
				452	WhitespaceManager &Whitespaces) const override;
Krasimir Georgiev	a7a24bf	2017-03-08 08:58:44 +0000	[diff] [blame]	453	void updateNextToken(LineState &State) const override;
Krasimir Georgiev	00c5c72	2017-02-02 15:32:19 +0000	[diff] [blame]	454	bool mayReflow(unsigned LineIndex,
				455	llvm::Regex &CommentPragmasRegex) const override;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	456
				457	private:
Krasimir Georgiev	2091a3a	2017-02-08 14:45:19 +0000	[diff] [blame]	458	// OriginalPrefix[i] contains the original prefix of line i, including
				459	// trailing whitespace before the start of the content. The indentation
				460	// preceding the prefix is not included.
				461	// For example, if the line is:
				462	// // content
				463	// then the original prefix is "// ".
				464	SmallVector<StringRef, 16> OriginalPrefix;
				465
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	466	// Prefix[i] contains the intended leading "//" with trailing spaces to
				467	// account for the indentation of content within the comment at line i after
				468	// formatting. It can be different than the original prefix when the original
				469	// line starts like this:
				470	// //content
				471	// Then the original prefix is "//", but the prefix is "// ".
				472	SmallVector<StringRef, 16> Prefix;
				473
				474	SmallVector<unsigned, 16> OriginalContentColumn;
				475
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	476	/// The token to which the last line of this breakable token belongs
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	477	/// to; nullptr if that token is the initial token.
				478	///
				479	/// The distinction is because if the token of the last line of this breakable
				480	/// token is distinct from the initial token, this breakable token owns the
				481	/// whitespace before the token of the last line, and the whitespace manager
				482	/// must be able to modify it.
				483	FormatToken *LastLineTok = nullptr;
				484	};
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	485	} // namespace format
				486	} // namespace clang
				487
Benjamin Kramer	2f5db8b	2014-08-13 16:25:19 +0000	[diff] [blame]	488	#endif