|  | //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | /// | 
|  | /// \file | 
|  | /// This file implements an indenter that manages the indentation of | 
|  | /// continuations. | 
|  | /// | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H | 
|  | #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H | 
|  |  | 
|  | #include "Encoding.h" | 
|  | #include "FormatToken.h" | 
|  | #include "clang/Format/Format.h" | 
|  | #include "llvm/Support/Regex.h" | 
|  | #include <map> | 
|  | #include <tuple> | 
|  |  | 
|  | namespace clang { | 
|  | class SourceManager; | 
|  |  | 
|  | namespace format { | 
|  |  | 
|  | class AnnotatedLine; | 
|  | class BreakableToken; | 
|  | struct FormatToken; | 
|  | struct LineState; | 
|  | struct ParenState; | 
|  | struct RawStringFormatStyleManager; | 
|  | class WhitespaceManager; | 
|  |  | 
|  | struct RawStringFormatStyleManager { | 
|  | llvm::StringMap<FormatStyle> DelimiterStyle; | 
|  | llvm::StringMap<FormatStyle> EnclosingFunctionStyle; | 
|  |  | 
|  | RawStringFormatStyleManager(const FormatStyle &CodeStyle); | 
|  |  | 
|  | llvm::Optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const; | 
|  |  | 
|  | llvm::Optional<FormatStyle> | 
|  | getEnclosingFunctionStyle(StringRef EnclosingFunction) const; | 
|  | }; | 
|  |  | 
|  | class ContinuationIndenter { | 
|  | public: | 
|  | /// Constructs a \c ContinuationIndenter to format \p Line starting in | 
|  | /// column \p FirstIndent. | 
|  | ContinuationIndenter(const FormatStyle &Style, | 
|  | const AdditionalKeywords &Keywords, | 
|  | const SourceManager &SourceMgr, | 
|  | WhitespaceManager &Whitespaces, | 
|  | encoding::Encoding Encoding, | 
|  | bool BinPackInconclusiveFunctions); | 
|  |  | 
|  | /// Get the initial state, i.e. the state after placing \p Line's | 
|  | /// first token at \p FirstIndent. When reformatting a fragment of code, as in | 
|  | /// the case of formatting inside raw string literals, \p FirstStartColumn is | 
|  | /// the column at which the state of the parent formatter is. | 
|  | LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn, | 
|  | const AnnotatedLine *Line, bool DryRun); | 
|  |  | 
|  | // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a | 
|  | // better home. | 
|  | /// Returns \c true, if a line break after \p State is allowed. | 
|  | bool canBreak(const LineState &State); | 
|  |  | 
|  | /// Returns \c true, if a line break after \p State is mandatory. | 
|  | bool mustBreak(const LineState &State); | 
|  |  | 
|  | /// Appends the next token to \p State and updates information | 
|  | /// necessary for indentation. | 
|  | /// | 
|  | /// Puts the token on the current line if \p Newline is \c false and adds a | 
|  | /// line break and necessary indentation otherwise. | 
|  | /// | 
|  | /// If \p DryRun is \c false, also creates and stores the required | 
|  | /// \c Replacement. | 
|  | unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, | 
|  | unsigned ExtraSpaces = 0); | 
|  |  | 
|  | /// Get the column limit for this line. This is the style's column | 
|  | /// limit, potentially reduced for preprocessor definitions. | 
|  | unsigned getColumnLimit(const LineState &State) const; | 
|  |  | 
|  | private: | 
|  | /// Mark the next token as consumed in \p State and modify its stacks | 
|  | /// accordingly. | 
|  | unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline); | 
|  |  | 
|  | /// Update 'State' according to the next token's fake left parentheses. | 
|  | void moveStatePastFakeLParens(LineState &State, bool Newline); | 
|  | /// Update 'State' according to the next token's fake r_parens. | 
|  | void moveStatePastFakeRParens(LineState &State); | 
|  |  | 
|  | /// Update 'State' according to the next token being one of "(<{[". | 
|  | void moveStatePastScopeOpener(LineState &State, bool Newline); | 
|  | /// Update 'State' according to the next token being one of ")>}]". | 
|  | void moveStatePastScopeCloser(LineState &State); | 
|  | /// Update 'State' with the next token opening a nested block. | 
|  | void moveStateToNewBlock(LineState &State); | 
|  |  | 
|  | /// Reformats a raw string literal. | 
|  | /// | 
|  | /// \returns An extra penalty induced by reformatting the token. | 
|  | unsigned reformatRawStringLiteral(const FormatToken &Current, | 
|  | LineState &State, | 
|  | const FormatStyle &RawStringStyle, | 
|  | bool DryRun); | 
|  |  | 
|  | /// If the current token is at the end of the current line, handle | 
|  | /// the transition to the next line. | 
|  | unsigned handleEndOfLine(const FormatToken &Current, LineState &State, | 
|  | bool DryRun, bool AllowBreak); | 
|  |  | 
|  | /// If \p Current is a raw string that is configured to be reformatted, | 
|  | /// return the style to be used. | 
|  | llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current, | 
|  | const LineState &State); | 
|  |  | 
|  | /// If the current token sticks out over the end of the line, break | 
|  | /// it if possible. | 
|  | /// | 
|  | /// \returns A pair (penalty, exceeded), where penalty is the extra penalty | 
|  | /// when tokens are broken or lines exceed the column limit, and exceeded | 
|  | /// indicates whether the algorithm purposefully left lines exceeding the | 
|  | /// column limit. | 
|  | /// | 
|  | /// The returned penalty will cover the cost of the additional line breaks | 
|  | /// and column limit violation in all lines except for the last one. The | 
|  | /// penalty for the column limit violation in the last line (and in single | 
|  | /// line tokens) is handled in \c addNextStateToQueue. | 
|  | /// | 
|  | /// \p Strict indicates whether reflowing is allowed to leave characters | 
|  | /// protruding the column limit; if true, lines will be split strictly within | 
|  | /// the column limit where possible; if false, words are allowed to protrude | 
|  | /// over the column limit as long as the penalty is less than the penalty | 
|  | /// of a break. | 
|  | std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current, | 
|  | LineState &State, | 
|  | bool AllowBreak, bool DryRun, | 
|  | bool Strict); | 
|  |  | 
|  | /// Returns the \c BreakableToken starting at \p Current, or nullptr | 
|  | /// if the current token cannot be broken. | 
|  | std::unique_ptr<BreakableToken> | 
|  | createBreakableToken(const FormatToken &Current, LineState &State, | 
|  | bool AllowBreak); | 
|  |  | 
|  | /// Appends the next token to \p State and updates information | 
|  | /// necessary for indentation. | 
|  | /// | 
|  | /// Puts the token on the current line. | 
|  | /// | 
|  | /// If \p DryRun is \c false, also creates and stores the required | 
|  | /// \c Replacement. | 
|  | void addTokenOnCurrentLine(LineState &State, bool DryRun, | 
|  | unsigned ExtraSpaces); | 
|  |  | 
|  | /// Appends the next token to \p State and updates information | 
|  | /// necessary for indentation. | 
|  | /// | 
|  | /// Adds a line break and necessary indentation. | 
|  | /// | 
|  | /// If \p DryRun is \c false, also creates and stores the required | 
|  | /// \c Replacement. | 
|  | unsigned addTokenOnNewLine(LineState &State, bool DryRun); | 
|  |  | 
|  | /// Calculate the new column for a line wrap before the next token. | 
|  | unsigned getNewLineColumn(const LineState &State); | 
|  |  | 
|  | /// Adds a multiline token to the \p State. | 
|  | /// | 
|  | /// \returns Extra penalty for the first line of the literal: last line is | 
|  | /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't | 
|  | /// matter, as we don't change them. | 
|  | unsigned addMultilineToken(const FormatToken &Current, LineState &State); | 
|  |  | 
|  | /// Returns \c true if the next token starts a multiline string | 
|  | /// literal. | 
|  | /// | 
|  | /// This includes implicitly concatenated strings, strings that will be broken | 
|  | /// by clang-format and string literals with escaped newlines. | 
|  | bool nextIsMultilineString(const LineState &State); | 
|  |  | 
|  | FormatStyle Style; | 
|  | const AdditionalKeywords &Keywords; | 
|  | const SourceManager &SourceMgr; | 
|  | WhitespaceManager &Whitespaces; | 
|  | encoding::Encoding Encoding; | 
|  | bool BinPackInconclusiveFunctions; | 
|  | llvm::Regex CommentPragmasRegex; | 
|  | const RawStringFormatStyleManager RawStringFormats; | 
|  | }; | 
|  |  | 
|  | struct ParenState { | 
|  | ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace, | 
|  | bool AvoidBinPacking, bool NoLineBreak) | 
|  | : Tok(Tok), Indent(Indent), LastSpace(LastSpace), | 
|  | NestedBlockIndent(Indent), BreakBeforeClosingBrace(false), | 
|  | AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), | 
|  | NoLineBreak(NoLineBreak), NoLineBreakInOperand(false), | 
|  | LastOperatorWrapped(true), ContainsLineBreak(false), | 
|  | ContainsUnwrappedBuilder(false), AlignColons(true), | 
|  | ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false), | 
|  | NestedBlockInlined(false), IsInsideObjCArrayLiteral(false) {} | 
|  |  | 
|  | /// \brief The token opening this parenthesis level, or nullptr if this level | 
|  | /// is opened by fake parenthesis. | 
|  | /// | 
|  | /// Not considered for memoization as it will always have the same value at | 
|  | /// the same token. | 
|  | const FormatToken *Tok; | 
|  |  | 
|  | /// The position to which a specific parenthesis level needs to be | 
|  | /// indented. | 
|  | unsigned Indent; | 
|  |  | 
|  | /// The position of the last space on each level. | 
|  | /// | 
|  | /// Used e.g. to break like: | 
|  | /// functionCall(Parameter, otherCall( | 
|  | ///                             OtherParameter)); | 
|  | unsigned LastSpace; | 
|  |  | 
|  | /// If a block relative to this parenthesis level gets wrapped, indent | 
|  | /// it this much. | 
|  | unsigned NestedBlockIndent; | 
|  |  | 
|  | /// The position the first "<<" operator encountered on each level. | 
|  | /// | 
|  | /// Used to align "<<" operators. 0 if no such operator has been encountered | 
|  | /// on a level. | 
|  | unsigned FirstLessLess = 0; | 
|  |  | 
|  | /// The column of a \c ? in a conditional expression; | 
|  | unsigned QuestionColumn = 0; | 
|  |  | 
|  | /// The position of the colon in an ObjC method declaration/call. | 
|  | unsigned ColonPos = 0; | 
|  |  | 
|  | /// The start of the most recent function in a builder-type call. | 
|  | unsigned StartOfFunctionCall = 0; | 
|  |  | 
|  | /// Contains the start of array subscript expressions, so that they | 
|  | /// can be aligned. | 
|  | unsigned StartOfArraySubscripts = 0; | 
|  |  | 
|  | /// If a nested name specifier was broken over multiple lines, this | 
|  | /// contains the start column of the second line. Otherwise 0. | 
|  | unsigned NestedNameSpecifierContinuation = 0; | 
|  |  | 
|  | /// If a call expression was broken over multiple lines, this | 
|  | /// contains the start column of the second line. Otherwise 0. | 
|  | unsigned CallContinuation = 0; | 
|  |  | 
|  | /// The column of the first variable name in a variable declaration. | 
|  | /// | 
|  | /// Used to align further variables if necessary. | 
|  | unsigned VariablePos = 0; | 
|  |  | 
|  | /// Whether a newline needs to be inserted before the block's closing | 
|  | /// brace. | 
|  | /// | 
|  | /// We only want to insert a newline before the closing brace if there also | 
|  | /// was a newline after the beginning left brace. | 
|  | bool BreakBeforeClosingBrace : 1; | 
|  |  | 
|  | /// Avoid bin packing, i.e. multiple parameters/elements on multiple | 
|  | /// lines, in this context. | 
|  | bool AvoidBinPacking : 1; | 
|  |  | 
|  | /// Break after the next comma (or all the commas in this context if | 
|  | /// \c AvoidBinPacking is \c true). | 
|  | bool BreakBeforeParameter : 1; | 
|  |  | 
|  | /// Line breaking in this context would break a formatting rule. | 
|  | bool NoLineBreak : 1; | 
|  |  | 
|  | /// Same as \c NoLineBreak, but is restricted until the end of the | 
|  | /// operand (including the next ","). | 
|  | bool NoLineBreakInOperand : 1; | 
|  |  | 
|  | /// True if the last binary operator on this level was wrapped to the | 
|  | /// next line. | 
|  | bool LastOperatorWrapped : 1; | 
|  |  | 
|  | /// \c true if this \c ParenState already contains a line-break. | 
|  | /// | 
|  | /// The first line break in a certain \c ParenState causes extra penalty so | 
|  | /// that clang-format prefers similar breaks, i.e. breaks in the same | 
|  | /// parenthesis. | 
|  | bool ContainsLineBreak : 1; | 
|  |  | 
|  | /// \c true if this \c ParenState contains multiple segments of a | 
|  | /// builder-type call on one line. | 
|  | bool ContainsUnwrappedBuilder : 1; | 
|  |  | 
|  | /// \c true if the colons of the curren ObjC method expression should | 
|  | /// be aligned. | 
|  | /// | 
|  | /// Not considered for memoization as it will always have the same value at | 
|  | /// the same token. | 
|  | bool AlignColons : 1; | 
|  |  | 
|  | /// \c true if at least one selector name was found in the current | 
|  | /// ObjC method expression. | 
|  | /// | 
|  | /// Not considered for memoization as it will always have the same value at | 
|  | /// the same token. | 
|  | bool ObjCSelectorNameFound : 1; | 
|  |  | 
|  | /// \c true if there are multiple nested blocks inside these parens. | 
|  | /// | 
|  | /// Not considered for memoization as it will always have the same value at | 
|  | /// the same token. | 
|  | bool HasMultipleNestedBlocks : 1; | 
|  |  | 
|  | /// The start of a nested block (e.g. lambda introducer in C++ or | 
|  | /// "function" in JavaScript) is not wrapped to a new line. | 
|  | bool NestedBlockInlined : 1; | 
|  |  | 
|  | /// \c true if the current \c ParenState represents an Objective-C | 
|  | /// array literal. | 
|  | bool IsInsideObjCArrayLiteral : 1; | 
|  |  | 
|  | bool operator<(const ParenState &Other) const { | 
|  | if (Indent != Other.Indent) | 
|  | return Indent < Other.Indent; | 
|  | if (LastSpace != Other.LastSpace) | 
|  | return LastSpace < Other.LastSpace; | 
|  | if (NestedBlockIndent != Other.NestedBlockIndent) | 
|  | return NestedBlockIndent < Other.NestedBlockIndent; | 
|  | if (FirstLessLess != Other.FirstLessLess) | 
|  | return FirstLessLess < Other.FirstLessLess; | 
|  | if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) | 
|  | return BreakBeforeClosingBrace; | 
|  | if (QuestionColumn != Other.QuestionColumn) | 
|  | return QuestionColumn < Other.QuestionColumn; | 
|  | if (AvoidBinPacking != Other.AvoidBinPacking) | 
|  | return AvoidBinPacking; | 
|  | if (BreakBeforeParameter != Other.BreakBeforeParameter) | 
|  | return BreakBeforeParameter; | 
|  | if (NoLineBreak != Other.NoLineBreak) | 
|  | return NoLineBreak; | 
|  | if (LastOperatorWrapped != Other.LastOperatorWrapped) | 
|  | return LastOperatorWrapped; | 
|  | if (ColonPos != Other.ColonPos) | 
|  | return ColonPos < Other.ColonPos; | 
|  | if (StartOfFunctionCall != Other.StartOfFunctionCall) | 
|  | return StartOfFunctionCall < Other.StartOfFunctionCall; | 
|  | if (StartOfArraySubscripts != Other.StartOfArraySubscripts) | 
|  | return StartOfArraySubscripts < Other.StartOfArraySubscripts; | 
|  | if (CallContinuation != Other.CallContinuation) | 
|  | return CallContinuation < Other.CallContinuation; | 
|  | if (VariablePos != Other.VariablePos) | 
|  | return VariablePos < Other.VariablePos; | 
|  | if (ContainsLineBreak != Other.ContainsLineBreak) | 
|  | return ContainsLineBreak; | 
|  | if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder) | 
|  | return ContainsUnwrappedBuilder; | 
|  | if (NestedBlockInlined != Other.NestedBlockInlined) | 
|  | return NestedBlockInlined; | 
|  | return false; | 
|  | } | 
|  | }; | 
|  |  | 
|  | /// The current state when indenting a unwrapped line. | 
|  | /// | 
|  | /// As the indenting tries different combinations this is copied by value. | 
|  | struct LineState { | 
|  | /// The number of used columns in the current line. | 
|  | unsigned Column; | 
|  |  | 
|  | /// The token that needs to be next formatted. | 
|  | FormatToken *NextToken; | 
|  |  | 
|  | /// \c true if this line contains a continued for-loop section. | 
|  | bool LineContainsContinuedForLoopSection; | 
|  |  | 
|  | /// \c true if \p NextToken should not continue this line. | 
|  | bool NoContinuation; | 
|  |  | 
|  | /// The \c NestingLevel at the start of this line. | 
|  | unsigned StartOfLineLevel; | 
|  |  | 
|  | /// The lowest \c NestingLevel on the current line. | 
|  | unsigned LowestLevelOnLine; | 
|  |  | 
|  | /// The start column of the string literal, if we're in a string | 
|  | /// literal sequence, 0 otherwise. | 
|  | unsigned StartOfStringLiteral; | 
|  |  | 
|  | /// A stack keeping track of properties applying to parenthesis | 
|  | /// levels. | 
|  | std::vector<ParenState> Stack; | 
|  |  | 
|  | /// Ignore the stack of \c ParenStates for state comparison. | 
|  | /// | 
|  | /// In long and deeply nested unwrapped lines, the current algorithm can | 
|  | /// be insufficient for finding the best formatting with a reasonable amount | 
|  | /// of time and memory. Setting this flag will effectively lead to the | 
|  | /// algorithm not analyzing some combinations. However, these combinations | 
|  | /// rarely contain the optimal solution: In short, accepting a higher | 
|  | /// penalty early would need to lead to different values in the \c | 
|  | /// ParenState stack (in an otherwise identical state) and these different | 
|  | /// values would need to lead to a significant amount of avoided penalty | 
|  | /// later. | 
|  | /// | 
|  | /// FIXME: Come up with a better algorithm instead. | 
|  | bool IgnoreStackForComparison; | 
|  |  | 
|  | /// The indent of the first token. | 
|  | unsigned FirstIndent; | 
|  |  | 
|  | /// The line that is being formatted. | 
|  | /// | 
|  | /// Does not need to be considered for memoization because it doesn't change. | 
|  | const AnnotatedLine *Line; | 
|  |  | 
|  | /// Comparison operator to be able to used \c LineState in \c map. | 
|  | bool operator<(const LineState &Other) const { | 
|  | if (NextToken != Other.NextToken) | 
|  | return NextToken < Other.NextToken; | 
|  | if (Column != Other.Column) | 
|  | return Column < Other.Column; | 
|  | if (LineContainsContinuedForLoopSection != | 
|  | Other.LineContainsContinuedForLoopSection) | 
|  | return LineContainsContinuedForLoopSection; | 
|  | if (NoContinuation != Other.NoContinuation) | 
|  | return NoContinuation; | 
|  | if (StartOfLineLevel != Other.StartOfLineLevel) | 
|  | return StartOfLineLevel < Other.StartOfLineLevel; | 
|  | if (LowestLevelOnLine != Other.LowestLevelOnLine) | 
|  | return LowestLevelOnLine < Other.LowestLevelOnLine; | 
|  | if (StartOfStringLiteral != Other.StartOfStringLiteral) | 
|  | return StartOfStringLiteral < Other.StartOfStringLiteral; | 
|  | if (IgnoreStackForComparison || Other.IgnoreStackForComparison) | 
|  | return false; | 
|  | return Stack < Other.Stack; | 
|  | } | 
|  | }; | 
|  |  | 
|  | } // end namespace format | 
|  | } // end namespace clang | 
|  |  | 
|  | #endif |