Blame - clang/lib/Format/BreakableToken.h - toolchain/llvm-project

blob: 86cb4b71e742a4d7cc638fe314868290c665d70e [file] [log] [blame]

Erik Pilkington	7adcf29	2018-07-24 00:07:49 +0000	[diff] [blame]	1	//===--- BreakableToken.h - Format C++ code ---------------------- C++ --===//
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	2	//
Chandler Carruth	2946cd7	2019-01-19 08:50:56 +0000	[diff] [blame^]	3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	6	//
				7	//===----------------------------------------------------------------------===//
				8	///
				9	/// \file
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	10	/// Declares BreakableToken, BreakableStringLiteral, BreakableComment,
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	11	/// BreakableBlockComment and BreakableLineCommentSection classes, that contain
				12	/// token type-specific logic to break long lines in tokens and reflow content
				13	/// between tokens.
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	14	///
				15	//===----------------------------------------------------------------------===//
				16
Benjamin Kramer	2f5db8b	2014-08-13 16:25:19 +0000	[diff] [blame]	17	#ifndef LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H
				18	#define LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	19
Alexander Kornienko	ffcc010	2013-06-05 14:09:10 +0000	[diff] [blame]	20	#include "Encoding.h"
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	21	#include "TokenAnnotator.h"
				22	#include "WhitespaceManager.h"
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	23	#include "llvm/ADT/StringSet.h"
Krasimir Georgiev	00c5c72	2017-02-02 15:32:19 +0000	[diff] [blame]	24	#include "llvm/Support/Regex.h"
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	25	#include <utility>
				26
				27	namespace clang {
				28	namespace format {
				29
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	30	/// Checks if \p Token switches formatting, like /* clang-format off */.
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	31	/// \p Token must be a comment.
				32	bool switchesFormatting(const FormatToken &Token);
				33
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	34	struct FormatStyle;
				35
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	36	/// Base class for tokens / ranges of tokens that can allow breaking
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	37	/// within the tokens - for example, to avoid whitespace beyond the column
				38	/// limit, or to reflow text.
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	39	///
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	40	/// Generally, a breakable token consists of logical lines, addressed by a line
				41	/// index. For example, in a sequence of line comments, each line comment is its
				42	/// own logical line; similarly, for a block comment, each line in the block
				43	/// comment is on its own logical line.
				44	///
				45	/// There are two methods to compute the layout of the token:
				46	/// - getRangeLength measures the number of columns needed for a range of text
				47	/// within a logical line, and
				48	/// - getContentStartColumn returns the start column at which we want the
				49	/// content of a logical line to start (potentially after introducing a line
				50	/// break).
				51	///
				52	/// The mechanism to adapt the layout of the breakable token is organised
				53	/// around the concept of a \c Split, which is a whitespace range that signifies
				54	/// a position of the content of a token where a reformatting might be done.
				55	///
				56	/// Operating with splits is divided into two operations:
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	57	/// - getSplit, for finding a split starting at a position,
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	58	/// - insertBreak, for executing the split using a whitespace manager.
				59	///
				60	/// There is a pair of operations that are used to compress a long whitespace
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	61	/// range with a single space if that will bring the line length under the
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	62	/// column limit:
				63	/// - getLineLengthAfterCompression, for calculating the size in columns of the
				64	/// line after a whitespace range has been compressed, and
				65	/// - compressWhitespace, for executing the whitespace compression using a
				66	/// whitespace manager; note that the compressed whitespace may be in the
				67	/// middle of the original line and of the reformatted line.
				68	///
				69	/// For tokens where the whitespace before each line needs to be also
				70	/// reformatted, for example for tokens supporting reflow, there are analogous
				71	/// operations that might be executed before the main line breaking occurs:
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	72	/// - getReflowSplit, for finding a split such that the content preceding it
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	73	/// needs to be specially reflown,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	74	/// - reflow, for executing the split using a whitespace manager,
Krasimir Georgiev	35599fd	2017-10-16 09:08:53 +0000	[diff] [blame]	75	/// - introducesBreakBefore, for checking if reformatting the beginning
				76	/// of the content introduces a line break before it,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	77	/// - adaptStartOfLine, for executing the reflow using a whitespace
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	78	/// manager.
				79	///
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	80	/// For tokens that require the whitespace after the last line to be
				81	/// reformatted, for example in multiline jsdoc comments that require the
				82	/// trailing '*/' to be on a line of itself, there are analogous operations
				83	/// that might be executed after the last line has been reformatted:
				84	/// - getSplitAfterLastLine, for finding a split after the last line that needs
				85	/// to be reflown,
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	86	/// - replaceWhitespaceAfterLastLine, for executing the reflow using a
				87	/// whitespace manager.
				88	///
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	89	class BreakableToken {
				90	public:
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	91	/// Contains starting character index and length of split.
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	92	typedef std::pair<StringRef::size_type, unsigned> Split;
				93
Angel Garcia Gomez	637d1e6	2015-10-20 13:23:58 +0000	[diff] [blame]	94	virtual ~BreakableToken() {}
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	95
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	96	/// Returns the number of lines in this token in the original code.
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	97	virtual unsigned getLineCount() const = 0;
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	98
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	99	/// Returns the number of columns required to format the text in the
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	100	/// byte range [\p Offset, \p Offset \c + \p Length).
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	101	///
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	102	/// \p Offset is the byte offset from the start of the content of the line
				103	/// at \p LineIndex.
				104	///
				105	/// \p StartColumn is the column at which the text starts in the formatted
				106	/// file, needed to compute tab stops correctly.
				107	virtual unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
				108	StringRef::size_type Length,
				109	unsigned StartColumn) const = 0;
				110
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	111	/// Returns the number of columns required to format the text following
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	112	/// the byte \p Offset in the line \p LineIndex, including potentially
				113	/// unbreakable sequences of tokens following after the end of the token.
				114	///
				115	/// \p Offset is the byte offset from the start of the content of the line
				116	/// at \p LineIndex.
				117	///
				118	/// \p StartColumn is the column at which the text starts in the formatted
				119	/// file, needed to compute tab stops correctly.
				120	///
				121	/// For breakable tokens that never use extra space at the end of a line, this
				122	/// is equivalent to getRangeLength with a Length of StringRef::npos.
				123	virtual unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
				124	unsigned StartColumn) const {
				125	return getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
				126	}
				127
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	128	/// Returns the column at which content in line \p LineIndex starts,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	129	/// assuming no reflow.
				130	///
				131	/// If \p Break is true, returns the column at which the line should start
				132	/// after the line break.
				133	/// If \p Break is false, returns the column at which the line itself will
				134	/// start.
				135	virtual unsigned getContentStartColumn(unsigned LineIndex,
				136	bool Break) const = 0;
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	137
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	138	/// Returns additional content indent required for the second line after the
				139	/// content at line \p LineIndex is broken.
				140	///
Krasimir Georgiev	e3424bf	2018-07-30 12:22:41 +0000	[diff] [blame]	141	// (Next lines do not start with `///` since otherwise -Wdocumentation picks
				142	// up the example annotations and generates warnings for them)
				143	// For example, Javadoc @param annotations require and indent of 4 spaces and
				144	// in this example getContentIndex(1) returns 4.
				145	// /**
				146	// * @param loooooooooooooong line
				147	// * continuation
				148	// */
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	149	virtual unsigned getContentIndent(unsigned LineIndex) const {
				150	return 0;
				151	}
				152
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	153	/// Returns a range (offset, length) at which to break the line at
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	154	/// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	155	/// violate \p ColumnLimit, assuming the text starting at \p TailOffset in
				156	/// the token is formatted starting at ContentStartColumn in the reformatted
				157	/// file.
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	158	virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	159	unsigned ColumnLimit, unsigned ContentStartColumn,
Krasimir Georgiev	17725d8	2017-03-08 08:55:12 +0000	[diff] [blame]	160	llvm::Regex &CommentPragmasRegex) const = 0;
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	161
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	162	/// Emits the previously retrieved \p Split via \p Whitespaces.
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	163	virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	164	unsigned ContentIndent,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	165	WhitespaceManager &Whitespaces) const = 0;
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	166
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	167	/// Returns the number of columns needed to format
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	168	/// \p RemainingTokenColumns, assuming that Split is within the range measured
				169	/// by \p RemainingTokenColumns, and that the whitespace in Split is reduced
				170	/// to a single space.
				171	unsigned getLengthAfterCompression(unsigned RemainingTokenColumns,
				172	Split Split) const;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	173
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	174	/// Replaces the whitespace range described by \p Split with a single
Alexander Kornienko	875395f	2013-11-12 17:50:13 +0000	[diff] [blame]	175	/// space.
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	176	virtual void compressWhitespace(unsigned LineIndex, unsigned TailOffset,
				177	Split Split,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	178	WhitespaceManager &Whitespaces) const = 0;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	179
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	180	/// Returns whether the token supports reflowing text.
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	181	virtual bool supportsReflow() const { return false; }
				182
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	183	/// Returns a whitespace range (offset, length) of the content at \p
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	184	/// LineIndex such that the content of that line is reflown to the end of the
				185	/// previous one.
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	186	///
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	187	/// Returning (StringRef::npos, 0) indicates reflowing is not possible.
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	188	///
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	189	/// The range will include any whitespace preceding the specified line's
				190	/// content.
				191	///
				192	/// If the split is not contained within one token, for example when reflowing
				193	/// line comments, returns (0, <length>).
				194	virtual Split getReflowSplit(unsigned LineIndex,
Krasimir Georgiev	a7a24bf	2017-03-08 08:58:44 +0000	[diff] [blame]	195	llvm::Regex &CommentPragmasRegex) const {
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	196	return Split(StringRef::npos, 0);
				197	}
				198
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	199	/// Reflows the current line into the end of the previous one.
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	200	virtual void reflow(unsigned LineIndex,
				201	WhitespaceManager &Whitespaces) const {}
				202
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	203	/// Returns whether there will be a line break at the start of the
Manuel Klimek	7786614	2017-11-17 11:17:15 +0000	[diff] [blame]	204	/// token.
				205	virtual bool introducesBreakBeforeToken() const {
Krasimir Georgiev	35599fd	2017-10-16 09:08:53 +0000	[diff] [blame]	206	return false;
				207	}
				208
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	209	/// Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	210	virtual void adaptStartOfLine(unsigned LineIndex,
				211	WhitespaceManager &Whitespaces) const {}
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	212
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	213	/// Returns a whitespace range (offset, length) of the content at
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	214	/// the last line that needs to be reformatted after the last line has been
				215	/// reformatted.
				216	///
				217	/// A result having offset == StringRef::npos means that no reformat is
				218	/// necessary.
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	219	virtual Split getSplitAfterLastLine(unsigned TailOffset) const {
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	220	return Split(StringRef::npos, 0);
				221	}
				222
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	223	/// Replaces the whitespace from \p SplitAfterLastLine on the last line
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	224	/// after the last line has been formatted by performing a reformatting.
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	225	void replaceWhitespaceAfterLastLine(unsigned TailOffset,
				226	Split SplitAfterLastLine,
				227	WhitespaceManager &Whitespaces) const {
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	228	insertBreak(getLineCount() - 1, TailOffset, SplitAfterLastLine,
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	229	/ContentIndent=/0, Whitespaces);
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	230	}
				231
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	232	/// Updates the next token of \p State to the next token after this
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	233	/// one. This can be used when this token manages a set of underlying tokens
				234	/// as a unit and is responsible for the formatting of the them.
				235	virtual void updateNextToken(LineState &State) const {}
				236
Alexander Kornienko	9e90b62	2013-04-17 17:34:05 +0000	[diff] [blame]	237	protected:
Daniel Jasper	7d42f3f	2017-01-31 11:25:01 +0000	[diff] [blame]	238	BreakableToken(const FormatToken &Tok, bool InPPDirective,
				239	encoding::Encoding Encoding, const FormatStyle &Style)
				240	: Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding),
				241	Style(Style) {}
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	242
Alexander Kornienko	9e90b62	2013-04-17 17:34:05 +0000	[diff] [blame]	243	const FormatToken &Tok;
Alexander Kornienko	be63390	2013-06-14 11:46:10 +0000	[diff] [blame]	244	const bool InPPDirective;
				245	const encoding::Encoding Encoding;
Alexander Kornienko	ebb43ca	2013-09-05 14:08:34 +0000	[diff] [blame]	246	const FormatStyle &Style;
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	247	};
				248
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	249	class BreakableStringLiteral : public BreakableToken {
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	250	public:
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	251	/// Creates a breakable token for a single line string literal.
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	252	///
				253	/// \p StartColumn specifies the column in which the token will start
				254	/// after formatting.
Daniel Jasper	7d42f3f	2017-01-31 11:25:01 +0000	[diff] [blame]	255	BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
				256	StringRef Prefix, StringRef Postfix,
Krasimir Georgiev	55c23a1	2018-01-23 11:26:19 +0000	[diff] [blame]	257	unsigned UnbreakableTailLength, bool InPPDirective,
				258	encoding::Encoding Encoding, const FormatStyle &Style);
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	259
Krasimir Georgiev	17725d8	2017-03-08 08:55:12 +0000	[diff] [blame]	260	Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
Krasimir Georgiev	6bbc706	2018-04-23 10:02:59 +0000	[diff] [blame]	261	unsigned ContentStartColumn,
Krasimir Georgiev	17725d8	2017-03-08 08:55:12 +0000	[diff] [blame]	262	llvm::Regex &CommentPragmasRegex) const override;
Craig Topper	fb6b25b	2014-03-15 04:29:04 +0000	[diff] [blame]	263	void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	264	unsigned ContentIndent,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	265	WhitespaceManager &Whitespaces) const override;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	266	void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	267	WhitespaceManager &Whitespaces) const override {}
				268	unsigned getLineCount() const override;
				269	unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
				270	StringRef::size_type Length,
				271	unsigned StartColumn) const override;
				272	unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
				273	unsigned StartColumn) const override;
				274	unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
				275
				276	protected:
				277	// The column in which the token starts.
				278	unsigned StartColumn;
				279	// The prefix a line needs after a break in the token.
				280	StringRef Prefix;
				281	// The postfix a line needs before introducing a break.
				282	StringRef Postfix;
				283	// The token text excluding the prefix and postfix.
				284	StringRef Line;
				285	// Length of the sequence of tokens after this string literal that cannot
				286	// contain line breaks.
				287	unsigned UnbreakableTailLength;
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	288	};
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	289
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	290	class BreakableComment : public BreakableToken {
				291	protected:
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	292	/// Creates a breakable token for a comment.
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	293	///
Krasimir Georgiev	4b15922	2017-02-21 10:54:50 +0000	[diff] [blame]	294	/// \p StartColumn specifies the column in which the comment will start after
				295	/// formatting.
Daniel Jasper	7d42f3f	2017-01-31 11:25:01 +0000	[diff] [blame]	296	BreakableComment(const FormatToken &Token, unsigned StartColumn,
Daniel Jasper	7d42f3f	2017-01-31 11:25:01 +0000	[diff] [blame]	297	bool InPPDirective, encoding::Encoding Encoding,
				298	const FormatStyle &Style);
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	299
				300	public:
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	301	bool supportsReflow() const override { return true; }
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	302	unsigned getLineCount() const override;
Krasimir Georgiev	17725d8	2017-03-08 08:55:12 +0000	[diff] [blame]	303	Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
Krasimir Georgiev	6bbc706	2018-04-23 10:02:59 +0000	[diff] [blame]	304	unsigned ContentStartColumn,
Krasimir Georgiev	17725d8	2017-03-08 08:55:12 +0000	[diff] [blame]	305	llvm::Regex &CommentPragmasRegex) const override;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	306	void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	307	WhitespaceManager &Whitespaces) const override;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	308
				309	protected:
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	310	// Returns the token containing the line at LineIndex.
				311	const FormatToken &tokenAt(unsigned LineIndex) const;
				312
				313	// Checks if the content of line LineIndex may be reflown with the previous
				314	// line.
Krasimir Georgiev	00c5c72	2017-02-02 15:32:19 +0000	[diff] [blame]	315	virtual bool mayReflow(unsigned LineIndex,
				316	llvm::Regex &CommentPragmasRegex) const = 0;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	317
				318	// Contains the original text of the lines of the block comment.
				319	//
				320	// In case of a block comments, excludes the leading /* in the first line and
				321	// trailing */ in the last line. In case of line comments, excludes the
				322	// leading // and spaces.
				323	SmallVector<StringRef, 16> Lines;
				324
				325	// Contains the text of the lines excluding all leading and trailing
				326	// whitespace between the lines. Note that the decoration (if present) is also
				327	// not considered part of the text.
				328	SmallVector<StringRef, 16> Content;
				329
				330	// Tokens[i] contains a reference to the token containing Lines[i] if the
				331	// whitespace range before that token is managed by this block.
				332	// Otherwise, Tokens[i] is a null pointer.
				333	SmallVector<FormatToken *, 16> Tokens;
				334
				335	// ContentColumn[i] is the target column at which Content[i] should be.
				336	// Note that this excludes a leading "* " or "*" in case of block comments
				337	// where all lines have a "*" prefix, or the leading "// " or "//" in case of
				338	// line comments.
				339	//
				340	// In block comments, the first line's target column is always positive. The
				341	// remaining lines' target columns are relative to the first line to allow
				342	// correct indentation of comments in \c WhitespaceManager. Thus they can be
				343	// negative as well (in case the first line needs to be unindented more than
				344	// there's actual whitespace in another line).
				345	SmallVector<int, 16> ContentColumn;
				346
				347	// The intended start column of the first line of text from this section.
				348	unsigned StartColumn;
				349
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	350	// The prefix to use in front a line that has been reflown up.
				351	// For example, when reflowing the second line after the first here:
				352	// // comment 1
				353	// // comment 2
				354	// we expect:
				355	// // comment 1 comment 2
				356	// and not:
				357	// // comment 1comment 2
				358	StringRef ReflowPrefix = " ";
				359	};
				360
				361	class BreakableBlockComment : public BreakableComment {
				362	public:
Daniel Jasper	7d42f3f	2017-01-31 11:25:01 +0000	[diff] [blame]	363	BreakableBlockComment(const FormatToken &Token, unsigned StartColumn,
				364	unsigned OriginalStartColumn, bool FirstInLine,
				365	bool InPPDirective, encoding::Encoding Encoding,
				366	const FormatStyle &Style);
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	367
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	368	unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
				369	StringRef::size_type Length,
				370	unsigned StartColumn) const override;
				371	unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
				372	unsigned StartColumn) const override;
				373	unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	374	unsigned getContentIndent(unsigned LineIndex) const override;
Craig Topper	fb6b25b	2014-03-15 04:29:04 +0000	[diff] [blame]	375	void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	376	unsigned ContentIndent,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	377	WhitespaceManager &Whitespaces) const override;
				378	Split getReflowSplit(unsigned LineIndex,
Krasimir Georgiev	00c5c72	2017-02-02 15:32:19 +0000	[diff] [blame]	379	llvm::Regex &CommentPragmasRegex) const override;
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	380	void reflow(unsigned LineIndex,
				381	WhitespaceManager &Whitespaces) const override;
Manuel Klimek	7786614	2017-11-17 11:17:15 +0000	[diff] [blame]	382	bool introducesBreakBeforeToken() const override;
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	383	void adaptStartOfLine(unsigned LineIndex,
				384	WhitespaceManager &Whitespaces) const override;
				385	Split getSplitAfterLastLine(unsigned TailOffset) const override;
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	386
Krasimir Georgiev	00c5c72	2017-02-02 15:32:19 +0000	[diff] [blame]	387	bool mayReflow(unsigned LineIndex,
				388	llvm::Regex &CommentPragmasRegex) const override;
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	389
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	390	// Contains Javadoc annotations that require additional indent when continued
				391	// on multiple lines.
				392	static const llvm::StringSet<> ContentIndentingJavadocAnnotations;
				393
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	394	private:
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	395	// Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex].
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	396	//
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	397	// Updates Content[LineIndex-1] and Content[LineIndex] by stripping off
				398	// leading and trailing whitespace.
				399	//
				400	// Sets ContentColumn to the intended column in which the text at
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	401	// Lines[LineIndex] starts (note that the decoration, if present, is not
				402	// considered part of the text).
Alexander Kornienko	ebb43ca	2013-09-05 14:08:34 +0000	[diff] [blame]	403	void adjustWhitespace(unsigned LineIndex, int IndentDelta);
Alexander Kornienko	9e90b62	2013-04-17 17:34:05 +0000	[diff] [blame]	404
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	405	// The column at which the text of a broken line should start.
				406	// Note that an optional decoration would go before that column.
				407	// IndentAtLineBreak is a uniform position for all lines in a block comment,
				408	// regardless of their relative position.
				409	// FIXME: Revisit the decision to do this; the main reason was to support
				410	// patterns like
				411	// /************//
				412	// * Comment
				413	// We could also support such patterns by special casing the first line
				414	// instead.
				415	unsigned IndentAtLineBreak;
				416
Alexander Kornienko	614d96a	2013-07-08 14:12:07 +0000	[diff] [blame]	417	// This is to distinguish between the case when the last line was empty and
				418	// the case when it started with a decoration ("" or " ").
				419	bool LastLineNeedsDecoration;
				420
Manuel Klimek	9043c74	2013-05-27 15:23:34 +0000	[diff] [blame]	421	// Either "* " if all lines begin with a "*", or empty.
				422	StringRef Decoration;
Krasimir Georgiev	bb99a36	2017-02-16 12:39:31 +0000	[diff] [blame]	423
				424	// If this block comment has decorations, this is the column of the start of
				425	// the decorations.
				426	unsigned DecorationColumn;
Krasimir Georgiev	22d7e6b	2017-07-20 22:29:39 +0000	[diff] [blame]	427
				428	// If true, make sure that the opening '/*' and the closing '/' ends on a
				429	// line of itself. Styles like jsdoc require this for multiline comments.
				430	bool DelimitersOnNewline;
Manuel Klimek	48c930c	2017-12-04 08:53:16 +0000	[diff] [blame]	431
				432	// Length of the sequence of tokens after this string literal that cannot
				433	// contain line breaks.
				434	unsigned UnbreakableTailLength;
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	435	};
				436
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	437	class BreakableLineCommentSection : public BreakableComment {
				438	public:
Daniel Jasper	7d42f3f	2017-01-31 11:25:01 +0000	[diff] [blame]	439	BreakableLineCommentSection(const FormatToken &Token, unsigned StartColumn,
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	440	unsigned OriginalStartColumn, bool FirstInLine,
				441	bool InPPDirective, encoding::Encoding Encoding,
				442	const FormatStyle &Style);
				443
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	444	unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
				445	StringRef::size_type Length,
				446	unsigned StartColumn) const override;
				447	unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	448	void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
Krasimir Georgiev	6a5c95b	2018-07-30 08:45:45 +0000	[diff] [blame]	449	unsigned ContentIndent,
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	450	WhitespaceManager &Whitespaces) const override;
				451	Split getReflowSplit(unsigned LineIndex,
Krasimir Georgiev	00c5c72	2017-02-02 15:32:19 +0000	[diff] [blame]	452	llvm::Regex &CommentPragmasRegex) const override;
Manuel Klimek	93699f4	2017-11-29 14:29:43 +0000	[diff] [blame]	453	void reflow(unsigned LineIndex,
				454	WhitespaceManager &Whitespaces) const override;
				455	void adaptStartOfLine(unsigned LineIndex,
				456	WhitespaceManager &Whitespaces) const override;
Krasimir Georgiev	a7a24bf	2017-03-08 08:58:44 +0000	[diff] [blame]	457	void updateNextToken(LineState &State) const override;
Krasimir Georgiev	00c5c72	2017-02-02 15:32:19 +0000	[diff] [blame]	458	bool mayReflow(unsigned LineIndex,
				459	llvm::Regex &CommentPragmasRegex) const override;
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	460
				461	private:
Krasimir Georgiev	2091a3a	2017-02-08 14:45:19 +0000	[diff] [blame]	462	// OriginalPrefix[i] contains the original prefix of line i, including
				463	// trailing whitespace before the start of the content. The indentation
				464	// preceding the prefix is not included.
				465	// For example, if the line is:
				466	// // content
				467	// then the original prefix is "// ".
				468	SmallVector<StringRef, 16> OriginalPrefix;
				469
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	470	// Prefix[i] contains the intended leading "//" with trailing spaces to
				471	// account for the indentation of content within the comment at line i after
				472	// formatting. It can be different than the original prefix when the original
				473	// line starts like this:
				474	// //content
				475	// Then the original prefix is "//", but the prefix is "// ".
				476	SmallVector<StringRef, 16> Prefix;
				477
				478	SmallVector<unsigned, 16> OriginalContentColumn;
				479
Adrian Prantl	9fc8faf	2018-05-09 01:00:01 +0000	[diff] [blame]	480	/// The token to which the last line of this breakable token belongs
Krasimir Georgiev	9183422	2017-01-25 13:58:58 +0000	[diff] [blame]	481	/// to; nullptr if that token is the initial token.
				482	///
				483	/// The distinction is because if the token of the last line of this breakable
				484	/// token is distinct from the initial token, this breakable token owns the
				485	/// whitespace before the token of the last line, and the whitespace manager
				486	/// must be able to modify it.
				487	FormatToken *LastLineTok = nullptr;
				488	};
Alexander Kornienko	cb45bc1	2013-04-15 14:28:00 +0000	[diff] [blame]	489	} // namespace format
				490	} // namespace clang
				491
Benjamin Kramer	2f5db8b	2014-08-13 16:25:19 +0000	[diff] [blame]	492	#endif