Blame - clang/lib/Format/BreakableToken.h - toolchain/llvm-project

blob: a2a818f91148ef87451226082304c3156cc6503a [file] [log] [blame]

Erik Pilkington	7adcf29	2018-07-24 00:07:49 +0000	[diff] [blame]	1	//===--- BreakableToken.h - Format C++ code ---------------------- C++ --===//
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	///
				10	/// \file
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	11	/// Declares BreakableToken, BreakableStringLiteral, BreakableComment,
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	12	/// BreakableBlockComment and BreakableLineCommentSection classes, that contain
				13	/// token type-specific logic to break long lines in tokens and reflow content
				14	/// between tokens.
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	15	///
				16	//===----------------------------------------------------------------------===//
				17
Benjamin Kramer	2f5db8b	2014-08-13 16:25:19 +0000	[diff] [blame]	18	#ifndef LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H
				19	#define LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	20
Alexander Kornienko	ffcc010	2013-06-05 14:09:10 +0000	[diff] [blame]	21	#include "Encoding.h"
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	22	#include "TokenAnnotator.h"
				23	#include "WhitespaceManager.h"
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	24	#include "llvm/ADT/StringSet.h"
Krasimir Georgiev	00c5c72	2017-02-02 15:32:19 +0000	[diff] [blame]	25	#include "llvm/Support/Regex.h"
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	26	#include <utility>
				27
				28	namespace clang {
				29	namespace format {
				30
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	31	/// Checks if \p Token switches formatting, like /* clang-format off */.
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	32	/// \p Token must be a comment.
				33	bool switchesFormatting(const FormatToken &Token);
				34
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	35	struct FormatStyle;
				36
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	37	/// Base class for tokens / ranges of tokens that can allow breaking
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	38	/// within the tokens - for example, to avoid whitespace beyond the column
				39	/// limit, or to reflow text.
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	40	///
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	41	/// Generally, a breakable token consists of logical lines, addressed by a line
				42	/// index. For example, in a sequence of line comments, each line comment is its
				43	/// own logical line; similarly, for a block comment, each line in the block
				44	/// comment is on its own logical line.
				45	///
				46	/// There are two methods to compute the layout of the token:
				47	/// - getRangeLength measures the number of columns needed for a range of text
				48	/// within a logical line, and
				49	/// - getContentStartColumn returns the start column at which we want the
				50	/// content of a logical line to start (potentially after introducing a line
				51	/// break).
				52	///
				53	/// The mechanism to adapt the layout of the breakable token is organised
				54	/// around the concept of a \c Split, which is a whitespace range that signifies
				55	/// a position of the content of a token where a reformatting might be done.
				56	///
				57	/// Operating with splits is divided into two operations:
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	58	/// - getSplit, for finding a split starting at a position,
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	59	/// - insertBreak, for executing the split using a whitespace manager.
				60	///
				61	/// There is a pair of operations that are used to compress a long whitespace
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	62	/// range with a single space if that will bring the line length under the
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	63	/// column limit:
				64	/// - getLineLengthAfterCompression, for calculating the size in columns of the
				65	/// line after a whitespace range has been compressed, and
				66	/// - compressWhitespace, for executing the whitespace compression using a
				67	/// whitespace manager; note that the compressed whitespace may be in the
				68	/// middle of the original line and of the reformatted line.
				69	///
				70	/// For tokens where the whitespace before each line needs to be also
				71	/// reformatted, for example for tokens supporting reflow, there are analogous
				72	/// operations that might be executed before the main line breaking occurs:
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	73	/// - getReflowSplit, for finding a split such that the content preceding it
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	74	/// needs to be specially reflown,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	75	/// - reflow, for executing the split using a whitespace manager,
Krasimir Georgiev	35599fd	2017-10-16 09:08:53 +0000	[diff] [blame]	76	/// - introducesBreakBefore, for checking if reformatting the beginning
				77	/// of the content introduces a line break before it,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	78	/// - adaptStartOfLine, for executing the reflow using a whitespace
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	79	/// manager.
				80	///
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	81	/// For tokens that require the whitespace after the last line to be
				82	/// reformatted, for example in multiline jsdoc comments that require the
				83	/// trailing '*/' to be on a line of itself, there are analogous operations
				84	/// that might be executed after the last line has been reformatted:
				85	/// - getSplitAfterLastLine, for finding a split after the last line that needs
				86	/// to be reflown,
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	87	/// - replaceWhitespaceAfterLastLine, for executing the reflow using a
				88	/// whitespace manager.
				89	///
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	90	class BreakableToken {
				91	public:
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	92	/// Contains starting character index and length of split.
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	93	typedef std::pair<StringRef::size_type, unsigned> Split;
				94
Angel Garcia Gomez	637d1e6	2015-10-20 13:23:58 +0000	[diff] [blame]	95	virtual ~BreakableToken() {}
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	96
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	97	/// Returns the number of lines in this token in the original code.
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	98	virtual unsigned getLineCount() const = 0;
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	99
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	100	/// Returns the number of columns required to format the text in the
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	101	/// byte range [\p Offset, \p Offset \c + \p Length).
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	102	///
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	103	/// \p Offset is the byte offset from the start of the content of the line
				104	/// at \p LineIndex.
				105	///
				106	/// \p StartColumn is the column at which the text starts in the formatted
				107	/// file, needed to compute tab stops correctly.
				108	virtual unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
				109	StringRef::size_type Length,
				110	unsigned StartColumn) const = 0;
				111
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	112	/// Returns the number of columns required to format the text following
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	113	/// the byte \p Offset in the line \p LineIndex, including potentially
				114	/// unbreakable sequences of tokens following after the end of the token.
				115	///
				116	/// \p Offset is the byte offset from the start of the content of the line
				117	/// at \p LineIndex.
				118	///
				119	/// \p StartColumn is the column at which the text starts in the formatted
				120	/// file, needed to compute tab stops correctly.
				121	///
				122	/// For breakable tokens that never use extra space at the end of a line, this
				123	/// is equivalent to getRangeLength with a Length of StringRef::npos.
				124	virtual unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
				125	unsigned StartColumn) const {
				126	return getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
				127	}
				128
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	129	/// Returns the column at which content in line \p LineIndex starts,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	130	/// assuming no reflow.
				131	///
				132	/// If \p Break is true, returns the column at which the line should start
				133	/// after the line break.
				134	/// If \p Break is false, returns the column at which the line itself will
				135	/// start.
				136	virtual unsigned getContentStartColumn(unsigned LineIndex,
				137	bool Break) const = 0;
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	138
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	139	/// Returns additional content indent required for the second line after the
				140	/// content at line \p LineIndex is broken.
				141	///
				142	/// For example, Javadoc @param annotations require and indent of 4 spaces and
				143	/// in this example getContentIndex(1) returns 4.
				144	/// /**
				145	/// * @param loooooooooooooong line
				146	/// * continuation
				147	/// */
				148	virtual unsigned getContentIndent(unsigned LineIndex) const {
				149	return 0;
				150	}
				151
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	152	/// Returns a range (offset, length) at which to break the line at
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	153	/// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	154	/// violate \p ColumnLimit, assuming the text starting at \p TailOffset in
				155	/// the token is formatted starting at ContentStartColumn in the reformatted
				156	/// file.
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	157	virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	158	unsigned ColumnLimit, unsigned ContentStartColumn,
Krasimir Georgiev	17725d8	2017-03-08 08:55:12 +0000	[diff] [blame]	159	llvm::Regex &CommentPragmasRegex) const = 0;
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	160
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	161	/// Emits the previously retrieved \p Split via \p Whitespaces.
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	162	virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	163	unsigned ContentIndent,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	164	WhitespaceManager &Whitespaces) const = 0;
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	165
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	166	/// Returns the number of columns needed to format
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	167	/// \p RemainingTokenColumns, assuming that Split is within the range measured
				168	/// by \p RemainingTokenColumns, and that the whitespace in Split is reduced
				169	/// to a single space.
				170	unsigned getLengthAfterCompression(unsigned RemainingTokenColumns,
				171	Split Split) const;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	172
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	173	/// Replaces the whitespace range described by \p Split with a single
Alexander Kornienko	875395f	2013-11-12 17:50:13 +0000	[diff] [blame]	174	/// space.
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	175	virtual void compressWhitespace(unsigned LineIndex, unsigned TailOffset,
				176	Split Split,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	177	WhitespaceManager &Whitespaces) const = 0;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	178
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	179	/// Returns whether the token supports reflowing text.
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	180	virtual bool supportsReflow() const { return false; }
				181
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	182	/// Returns a whitespace range (offset, length) of the content at \p
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	183	/// LineIndex such that the content of that line is reflown to the end of the
				184	/// previous one.
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	185	///
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	186	/// Returning (StringRef::npos, 0) indicates reflowing is not possible.
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	187	///
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	188	/// The range will include any whitespace preceding the specified line's
				189	/// content.
				190	///
				191	/// If the split is not contained within one token, for example when reflowing
				192	/// line comments, returns (0, <length>).
				193	virtual Split getReflowSplit(unsigned LineIndex,
Krasimir Georgiev	a7a24bf	2017-03-08 08:58:44 +0000	[diff] [blame]	194	llvm::Regex &CommentPragmasRegex) const {
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	195	return Split(StringRef::npos, 0);
				196	}
				197
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	198	/// Reflows the current line into the end of the previous one.
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	199	virtual void reflow(unsigned LineIndex,
				200	WhitespaceManager &Whitespaces) const {}
				201
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	202	/// Returns whether there will be a line break at the start of the
Manuel Klimek	7786614	2017-11-17 11:17:15 +0000	[diff] [blame]	203	/// token.
				204	virtual bool introducesBreakBeforeToken() const {
Krasimir Georgiev	35599fd	2017-10-16 09:08:53 +0000	[diff] [blame]	205	return false;
				206	}
				207
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	208	/// Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	209	virtual void adaptStartOfLine(unsigned LineIndex,
				210	WhitespaceManager &Whitespaces) const {}
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	211
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	212	/// Returns a whitespace range (offset, length) of the content at
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	213	/// the last line that needs to be reformatted after the last line has been
				214	/// reformatted.
				215	///
				216	/// A result having offset == StringRef::npos means that no reformat is
				217	/// necessary.
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	218	virtual Split getSplitAfterLastLine(unsigned TailOffset) const {
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	219	return Split(StringRef::npos, 0);
				220	}
				221
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	222	/// Replaces the whitespace from \p SplitAfterLastLine on the last line
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	223	/// after the last line has been formatted by performing a reformatting.
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	224	void replaceWhitespaceAfterLastLine(unsigned TailOffset,
				225	Split SplitAfterLastLine,
				226	WhitespaceManager &Whitespaces) const {
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	227	insertBreak(getLineCount() - 1, TailOffset, SplitAfterLastLine,
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	228	/ContentIndent=/0, Whitespaces);
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	229	}
				230
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	231	/// Updates the next token of \p State to the next token after this
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	232	/// one. This can be used when this token manages a set of underlying tokens
				233	/// as a unit and is responsible for the formatting of the them.
				234	virtual void updateNextToken(LineState &State) const {}
				235
Alexander Kornienko	9e90b62	2013-04-17 17:34:05 +0000	[diff] [blame]	236	protected:
Daniel Jasper	7d42f3f	2017-01-31 11:25:01 +0000	[diff] [blame]	237	BreakableToken(const FormatToken &Tok, bool InPPDirective,
				238	encoding::Encoding Encoding, const FormatStyle &Style)
				239	: Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding),
				240	Style(Style) {}
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	241
Alexander Kornienko	9e90b62	2013-04-17 17:34:05 +0000	[diff] [blame]	242	const FormatToken &Tok;
Alexander Kornienko	be63390	2013-06-14 11:46:10 +0000	[diff] [blame]	243	const bool InPPDirective;
				244	const encoding::Encoding Encoding;
Alexander Kornienko	ebb43ca	2013-09-05 14:08:34 +0000	[diff] [blame]	245	const FormatStyle &Style;
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	246	};
				247
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	248	class BreakableStringLiteral : public BreakableToken {
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	249	public:
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	250	/// Creates a breakable token for a single line string literal.
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	251	///
				252	/// \p StartColumn specifies the column in which the token will start
				253	/// after formatting.
Daniel Jasper	7d42f3f	2017-01-31 11:25:01 +0000	[diff] [blame]	254	BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
				255	StringRef Prefix, StringRef Postfix,
Krasimir Georgiev	55c23a1	2018-01-23 11:26:19 +0000	[diff] [blame]	256	unsigned UnbreakableTailLength, bool InPPDirective,
				257	encoding::Encoding Encoding, const FormatStyle &Style);
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	258
Krasimir Georgiev	17725d8	2017-03-08 08:55:12 +0000	[diff] [blame]	259	Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
Krasimir Georgiev	6bbc706	2018-04-23 10:02:59 +0000	[diff] [blame]	260	unsigned ContentStartColumn,
Krasimir Georgiev	17725d8	2017-03-08 08:55:12 +0000	[diff] [blame]	261	llvm::Regex &CommentPragmasRegex) const override;
Craig Topper	fb6b25b	2014-03-15 04:29:04 +0000	[diff] [blame]	262	void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	263	unsigned ContentIndent,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	264	WhitespaceManager &Whitespaces) const override;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	265	void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	266	WhitespaceManager &Whitespaces) const override {}
				267	unsigned getLineCount() const override;
				268	unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
				269	StringRef::size_type Length,
				270	unsigned StartColumn) const override;
				271	unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
				272	unsigned StartColumn) const override;
				273	unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
				274
				275	protected:
				276	// The column in which the token starts.
				277	unsigned StartColumn;
				278	// The prefix a line needs after a break in the token.
				279	StringRef Prefix;
				280	// The postfix a line needs before introducing a break.
				281	StringRef Postfix;
				282	// The token text excluding the prefix and postfix.
				283	StringRef Line;
				284	// Length of the sequence of tokens after this string literal that cannot
				285	// contain line breaks.
				286	unsigned UnbreakableTailLength;
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	287	};
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	288
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	289	class BreakableComment : public BreakableToken {
				290	protected:
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	291	/// Creates a breakable token for a comment.
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	292	///
Krasimir Georgiev	4b15922	2017-02-21 10:54:50 +0000	[diff] [blame]	293	/// \p StartColumn specifies the column in which the comment will start after
				294	/// formatting.
Daniel Jasper	7d42f3f	2017-01-31 11:25:01 +0000	[diff] [blame]	295	BreakableComment(const FormatToken &Token, unsigned StartColumn,
Daniel Jasper	7d42f3f	2017-01-31 11:25:01 +0000	[diff] [blame]	296	bool InPPDirective, encoding::Encoding Encoding,
				297	const FormatStyle &Style);
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	298
				299	public:
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	300	bool supportsReflow() const override { return true; }
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	301	unsigned getLineCount() const override;
Krasimir Georgiev	17725d8	2017-03-08 08:55:12 +0000	[diff] [blame]	302	Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
Krasimir Georgiev	6bbc706	2018-04-23 10:02:59 +0000	[diff] [blame]	303	unsigned ContentStartColumn,
Krasimir Georgiev	17725d8	2017-03-08 08:55:12 +0000	[diff] [blame]	304	llvm::Regex &CommentPragmasRegex) const override;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	305	void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	306	WhitespaceManager &Whitespaces) const override;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	307
				308	protected:
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	309	// Returns the token containing the line at LineIndex.
				310	const FormatToken &tokenAt(unsigned LineIndex) const;
				311
				312	// Checks if the content of line LineIndex may be reflown with the previous
				313	// line.
Krasimir Georgiev	00c5c72	2017-02-02 15:32:19 +0000	[diff] [blame]	314	virtual bool mayReflow(unsigned LineIndex,
				315	llvm::Regex &CommentPragmasRegex) const = 0;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	316
				317	// Contains the original text of the lines of the block comment.
				318	//
				319	// In case of a block comments, excludes the leading /* in the first line and
				320	// trailing */ in the last line. In case of line comments, excludes the
				321	// leading // and spaces.
				322	SmallVector<StringRef, 16> Lines;
				323
				324	// Contains the text of the lines excluding all leading and trailing
				325	// whitespace between the lines. Note that the decoration (if present) is also
				326	// not considered part of the text.
				327	SmallVector<StringRef, 16> Content;
				328
				329	// Tokens[i] contains a reference to the token containing Lines[i] if the
				330	// whitespace range before that token is managed by this block.
				331	// Otherwise, Tokens[i] is a null pointer.
				332	SmallVector<FormatToken *, 16> Tokens;
				333
				334	// ContentColumn[i] is the target column at which Content[i] should be.
				335	// Note that this excludes a leading "* " or "*" in case of block comments
				336	// where all lines have a "*" prefix, or the leading "// " or "//" in case of
				337	// line comments.
				338	//
				339	// In block comments, the first line's target column is always positive. The
				340	// remaining lines' target columns are relative to the first line to allow
				341	// correct indentation of comments in \c WhitespaceManager. Thus they can be
				342	// negative as well (in case the first line needs to be unindented more than
				343	// there's actual whitespace in another line).
				344	SmallVector<int, 16> ContentColumn;
				345
				346	// The intended start column of the first line of text from this section.
				347	unsigned StartColumn;
				348
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	349	// The prefix to use in front a line that has been reflown up.
				350	// For example, when reflowing the second line after the first here:
				351	// // comment 1
				352	// // comment 2
				353	// we expect:
				354	// // comment 1 comment 2
				355	// and not:
				356	// // comment 1comment 2
				357	StringRef ReflowPrefix = " ";
				358	};
				359
				360	class BreakableBlockComment : public BreakableComment {
				361	public:
Daniel Jasper	7d42f3f	2017-01-31 11:25:01 +0000	[diff] [blame]	362	BreakableBlockComment(const FormatToken &Token, unsigned StartColumn,
				363	unsigned OriginalStartColumn, bool FirstInLine,
				364	bool InPPDirective, encoding::Encoding Encoding,
				365	const FormatStyle &Style);
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	366
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	367	unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
				368	StringRef::size_type Length,
				369	unsigned StartColumn) const override;
				370	unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
				371	unsigned StartColumn) const override;
				372	unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	373	unsigned getContentIndent(unsigned LineIndex) const override;
Craig Topper	fb6b25b	2014-03-15 04:29:04 +0000	[diff] [blame]	374	void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	375	unsigned ContentIndent,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	376	WhitespaceManager &Whitespaces) const override;
				377	Split getReflowSplit(unsigned LineIndex,
Krasimir Georgiev	00c5c72	2017-02-02 15:32:19 +0000	[diff] [blame]	378	llvm::Regex &CommentPragmasRegex) const override;
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	379	void reflow(unsigned LineIndex,
				380	WhitespaceManager &Whitespaces) const override;
Manuel Klimek	7786614	2017-11-17 11:17:15 +0000	[diff] [blame]	381	bool introducesBreakBeforeToken() const override;
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	382	void adaptStartOfLine(unsigned LineIndex,
				383	WhitespaceManager &Whitespaces) const override;
				384	Split getSplitAfterLastLine(unsigned TailOffset) const override;
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	385
Krasimir Georgiev	00c5c72	2017-02-02 15:32:19 +0000	[diff] [blame]	386	bool mayReflow(unsigned LineIndex,
				387	llvm::Regex &CommentPragmasRegex) const override;
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	388
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	389	// Contains Javadoc annotations that require additional indent when continued
				390	// on multiple lines.
				391	static const llvm::StringSet<> ContentIndentingJavadocAnnotations;
				392
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	393	private:
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	394	// Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex].
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	395	//
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	396	// Updates Content[LineIndex-1] and Content[LineIndex] by stripping off
				397	// leading and trailing whitespace.
				398	//
				399	// Sets ContentColumn to the intended column in which the text at
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	400	// Lines[LineIndex] starts (note that the decoration, if present, is not
				401	// considered part of the text).
Alexander Kornienko	ebb43ca	2013-09-05 14:08:34 +0000	[diff] [blame]	402	void adjustWhitespace(unsigned LineIndex, int IndentDelta);
Alexander Kornienko	9e90b62	2013-04-17 17:34:05 +0000	[diff] [blame]	403
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	404	// The column at which the text of a broken line should start.
				405	// Note that an optional decoration would go before that column.
				406	// IndentAtLineBreak is a uniform position for all lines in a block comment,
				407	// regardless of their relative position.
				408	// FIXME: Revisit the decision to do this; the main reason was to support
				409	// patterns like
				410	// /************//
				411	// * Comment
				412	// We could also support such patterns by special casing the first line
				413	// instead.
				414	unsigned IndentAtLineBreak;
				415
Alexander Kornienko	614d96a	2013-07-08 14:12:07 +0000	[diff] [blame]	416	// This is to distinguish between the case when the last line was empty and
				417	// the case when it started with a decoration ("" or " ").
				418	bool LastLineNeedsDecoration;
				419
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	420	// Either "* " if all lines begin with a "*", or empty.
				421	StringRef Decoration;
Krasimir Georgiev	bb99a36	2017-02-16 12:39:31 +0000	[diff] [blame]	422
				423	// If this block comment has decorations, this is the column of the start of
				424	// the decorations.
				425	unsigned DecorationColumn;
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	426
				427	// If true, make sure that the opening '/*' and the closing '/' ends on a
				428	// line of itself. Styles like jsdoc require this for multiline comments.
				429	bool DelimitersOnNewline;
Manuel Klimek	48c930c	2017-12-04 08:53:16 +0000	[diff] [blame]	430
				431	// Length of the sequence of tokens after this string literal that cannot
				432	// contain line breaks.
				433	unsigned UnbreakableTailLength;
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	434	};
				435
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	436	class BreakableLineCommentSection : public BreakableComment {
				437	public:
Daniel Jasper	7d42f3f	2017-01-31 11:25:01 +0000	[diff] [blame]	438	BreakableLineCommentSection(const FormatToken &Token, unsigned StartColumn,
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	439	unsigned OriginalStartColumn, bool FirstInLine,
				440	bool InPPDirective, encoding::Encoding Encoding,
				441	const FormatStyle &Style);
				442
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	443	unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
				444	StringRef::size_type Length,
				445	unsigned StartColumn) const override;
				446	unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	447	void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	448	unsigned ContentIndent,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	449	WhitespaceManager &Whitespaces) const override;
				450	Split getReflowSplit(unsigned LineIndex,
Krasimir Georgiev	00c5c72	2017-02-02 15:32:19 +0000	[diff] [blame]	451	llvm::Regex &CommentPragmasRegex) const override;
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	452	void reflow(unsigned LineIndex,
				453	WhitespaceManager &Whitespaces) const override;
				454	void adaptStartOfLine(unsigned LineIndex,
				455	WhitespaceManager &Whitespaces) const override;
Krasimir Georgiev	a7a24bf	2017-03-08 08:58:44 +0000	[diff] [blame]	456	void updateNextToken(LineState &State) const override;
Krasimir Georgiev	00c5c72	2017-02-02 15:32:19 +0000	[diff] [blame]	457	bool mayReflow(unsigned LineIndex,
				458	llvm::Regex &CommentPragmasRegex) const override;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	459
				460	private:
Krasimir Georgiev	2091a3a	2017-02-08 14:45:19 +0000	[diff] [blame]	461	// OriginalPrefix[i] contains the original prefix of line i, including
				462	// trailing whitespace before the start of the content. The indentation
				463	// preceding the prefix is not included.
				464	// For example, if the line is:
				465	// // content
				466	// then the original prefix is "// ".
				467	SmallVector<StringRef, 16> OriginalPrefix;
				468
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	469	// Prefix[i] contains the intended leading "//" with trailing spaces to
				470	// account for the indentation of content within the comment at line i after
				471	// formatting. It can be different than the original prefix when the original
				472	// line starts like this:
				473	// //content
				474	// Then the original prefix is "//", but the prefix is "// ".
				475	SmallVector<StringRef, 16> Prefix;
				476
				477	SmallVector<unsigned, 16> OriginalContentColumn;
				478
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	479	/// The token to which the last line of this breakable token belongs
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	480	/// to; nullptr if that token is the initial token.
				481	///
				482	/// The distinction is because if the token of the last line of this breakable
				483	/// token is distinct from the initial token, this breakable token owns the
				484	/// whitespace before the token of the last line, and the whitespace manager
				485	/// must be able to modify it.
				486	FormatToken *LastLineTok = nullptr;
				487	};
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	488	} // namespace format
				489	} // namespace clang
				490
Benjamin Kramer	2f5db8b	2014-08-13 16:25:19 +0000	[diff] [blame]	491	#endif