Daniel Jasper | 6b2afe4 | 2013-08-16 11:20:30 +0000 | [diff] [blame] | 1 | //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | /// |
| 10 | /// \file |
| 11 | /// \brief This file implements an indenter that manages the indentation of |
| 12 | /// continuations. |
| 13 | /// |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #ifndef LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H |
| 17 | #define LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H |
| 18 | |
| 19 | #include "Encoding.h" |
| 20 | #include "clang/Format/Format.h" |
| 21 | |
| 22 | namespace clang { |
| 23 | class SourceManager; |
| 24 | |
| 25 | namespace format { |
| 26 | |
| 27 | class AnnotatedLine; |
| 28 | struct FormatToken; |
| 29 | struct LineState; |
| 30 | struct ParenState; |
| 31 | class WhitespaceManager; |
| 32 | |
| 33 | class ContinuationIndenter { |
| 34 | public: |
| 35 | /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in |
| 36 | /// column \p FirstIndent. |
| 37 | ContinuationIndenter(const FormatStyle &Style, SourceManager &SourceMgr, |
Daniel Jasper | 6b2afe4 | 2013-08-16 11:20:30 +0000 | [diff] [blame] | 38 | WhitespaceManager &Whitespaces, |
| 39 | encoding::Encoding Encoding, |
| 40 | bool BinPackInconclusiveFunctions); |
| 41 | |
Daniel Jasper | 567dcf9 | 2013-09-05 09:29:45 +0000 | [diff] [blame] | 42 | /// \brief Get the initial state, i.e. the state after placing \p Line's |
| 43 | /// first token at \p FirstIndent. |
Daniel Jasper | b77d741 | 2013-09-06 07:54:20 +0000 | [diff] [blame] | 44 | LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line, |
| 45 | bool DryRun); |
Daniel Jasper | 6b2afe4 | 2013-08-16 11:20:30 +0000 | [diff] [blame] | 46 | |
| 47 | // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a |
| 48 | // better home. |
| 49 | /// \brief Returns \c true, if a line break after \p State is allowed. |
| 50 | bool canBreak(const LineState &State); |
| 51 | |
| 52 | /// \brief Returns \c true, if a line break after \p State is mandatory. |
| 53 | bool mustBreak(const LineState &State); |
| 54 | |
| 55 | /// \brief Appends the next token to \p State and updates information |
| 56 | /// necessary for indentation. |
| 57 | /// |
| 58 | /// Puts the token on the current line if \p Newline is \c false and adds a |
| 59 | /// line break and necessary indentation otherwise. |
| 60 | /// |
| 61 | /// If \p DryRun is \c false, also creates and stores the required |
| 62 | /// \c Replacement. |
Daniel Jasper | d4a03db | 2013-08-22 15:00:41 +0000 | [diff] [blame] | 63 | unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, |
| 64 | unsigned ExtraSpaces = 0); |
Daniel Jasper | 6b2afe4 | 2013-08-16 11:20:30 +0000 | [diff] [blame] | 65 | |
| 66 | /// \brief Get the column limit for this line. This is the style's column |
| 67 | /// limit, potentially reduced for preprocessor definitions. |
Daniel Jasper | 567dcf9 | 2013-09-05 09:29:45 +0000 | [diff] [blame] | 68 | unsigned getColumnLimit(const LineState &State) const; |
Daniel Jasper | 6b2afe4 | 2013-08-16 11:20:30 +0000 | [diff] [blame] | 69 | |
| 70 | private: |
| 71 | /// \brief Mark the next token as consumed in \p State and modify its stacks |
| 72 | /// accordingly. |
| 73 | unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline); |
| 74 | |
| 75 | /// \brief If the current token sticks out over the end of the line, break |
| 76 | /// it if possible. |
| 77 | /// |
| 78 | /// \returns An extra penalty if a token was broken, otherwise 0. |
| 79 | /// |
| 80 | /// The returned penalty will cover the cost of the additional line breaks and |
| 81 | /// column limit violation in all lines except for the last one. The penalty |
| 82 | /// for the column limit violation in the last line (and in single line |
| 83 | /// tokens) is handled in \c addNextStateToQueue. |
| 84 | unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, |
| 85 | bool DryRun); |
| 86 | |
Alexander Kornienko | 6f6154c | 2013-09-10 12:29:48 +0000 | [diff] [blame^] | 87 | /// \brief Adds a multiline token to the \p State. |
Alexander Kornienko | dcc0c5b | 2013-08-29 17:32:57 +0000 | [diff] [blame] | 88 | /// |
| 89 | /// \returns Extra penalty for the first line of the literal: last line is |
| 90 | /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't |
| 91 | /// matter, as we don't change them. |
Alexander Kornienko | 6f6154c | 2013-09-10 12:29:48 +0000 | [diff] [blame^] | 92 | unsigned addMultilineToken(const FormatToken &Current, LineState &State); |
Alexander Kornienko | dcc0c5b | 2013-08-29 17:32:57 +0000 | [diff] [blame] | 93 | |
Daniel Jasper | 4df1ff9 | 2013-08-23 11:57:34 +0000 | [diff] [blame] | 94 | /// \brief Returns \c true if the next token starts a multiline string |
| 95 | /// literal. |
| 96 | /// |
| 97 | /// This includes implicitly concatenated strings, strings that will be broken |
| 98 | /// by clang-format and string literals with escaped newlines. |
| 99 | bool NextIsMultilineString(const LineState &State); |
| 100 | |
Daniel Jasper | 6b2afe4 | 2013-08-16 11:20:30 +0000 | [diff] [blame] | 101 | FormatStyle Style; |
| 102 | SourceManager &SourceMgr; |
Daniel Jasper | 6b2afe4 | 2013-08-16 11:20:30 +0000 | [diff] [blame] | 103 | WhitespaceManager &Whitespaces; |
| 104 | encoding::Encoding Encoding; |
| 105 | bool BinPackInconclusiveFunctions; |
| 106 | }; |
| 107 | |
| 108 | struct ParenState { |
| 109 | ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, |
| 110 | bool NoLineBreak) |
| 111 | : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0), |
| 112 | BreakBeforeClosingBrace(false), QuestionColumn(0), |
| 113 | AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), |
| 114 | NoLineBreak(NoLineBreak), ColonPos(0), StartOfFunctionCall(0), |
| 115 | StartOfArraySubscripts(0), NestedNameSpecifierContinuation(0), |
Daniel Jasper | d3fef0f | 2013-08-27 14:24:43 +0000 | [diff] [blame] | 116 | CallContinuation(0), VariablePos(0), ContainsLineBreak(false), |
| 117 | ContainsUnwrappedBuilder(0) {} |
Daniel Jasper | 6b2afe4 | 2013-08-16 11:20:30 +0000 | [diff] [blame] | 118 | |
| 119 | /// \brief The position to which a specific parenthesis level needs to be |
| 120 | /// indented. |
| 121 | unsigned Indent; |
| 122 | |
| 123 | /// \brief The position of the last space on each level. |
| 124 | /// |
| 125 | /// Used e.g. to break like: |
| 126 | /// functionCall(Parameter, otherCall( |
| 127 | /// OtherParameter)); |
| 128 | unsigned LastSpace; |
| 129 | |
| 130 | /// \brief The position the first "<<" operator encountered on each level. |
| 131 | /// |
| 132 | /// Used to align "<<" operators. 0 if no such operator has been encountered |
| 133 | /// on a level. |
| 134 | unsigned FirstLessLess; |
| 135 | |
| 136 | /// \brief Whether a newline needs to be inserted before the block's closing |
| 137 | /// brace. |
| 138 | /// |
| 139 | /// We only want to insert a newline before the closing brace if there also |
| 140 | /// was a newline after the beginning left brace. |
| 141 | bool BreakBeforeClosingBrace; |
| 142 | |
| 143 | /// \brief The column of a \c ? in a conditional expression; |
| 144 | unsigned QuestionColumn; |
| 145 | |
| 146 | /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple |
| 147 | /// lines, in this context. |
| 148 | bool AvoidBinPacking; |
| 149 | |
| 150 | /// \brief Break after the next comma (or all the commas in this context if |
| 151 | /// \c AvoidBinPacking is \c true). |
| 152 | bool BreakBeforeParameter; |
| 153 | |
| 154 | /// \brief Line breaking in this context would break a formatting rule. |
| 155 | bool NoLineBreak; |
| 156 | |
| 157 | /// \brief The position of the colon in an ObjC method declaration/call. |
| 158 | unsigned ColonPos; |
| 159 | |
| 160 | /// \brief The start of the most recent function in a builder-type call. |
| 161 | unsigned StartOfFunctionCall; |
| 162 | |
| 163 | /// \brief Contains the start of array subscript expressions, so that they |
| 164 | /// can be aligned. |
| 165 | unsigned StartOfArraySubscripts; |
| 166 | |
| 167 | /// \brief If a nested name specifier was broken over multiple lines, this |
| 168 | /// contains the start column of the second line. Otherwise 0. |
| 169 | unsigned NestedNameSpecifierContinuation; |
| 170 | |
| 171 | /// \brief If a call expression was broken over multiple lines, this |
| 172 | /// contains the start column of the second line. Otherwise 0. |
| 173 | unsigned CallContinuation; |
| 174 | |
| 175 | /// \brief The column of the first variable name in a variable declaration. |
| 176 | /// |
| 177 | /// Used to align further variables if necessary. |
| 178 | unsigned VariablePos; |
| 179 | |
| 180 | /// \brief \c true if this \c ParenState already contains a line-break. |
| 181 | /// |
| 182 | /// The first line break in a certain \c ParenState causes extra penalty so |
| 183 | /// that clang-format prefers similar breaks, i.e. breaks in the same |
| 184 | /// parenthesis. |
| 185 | bool ContainsLineBreak; |
| 186 | |
Daniel Jasper | d3fef0f | 2013-08-27 14:24:43 +0000 | [diff] [blame] | 187 | /// \brief \c true if this \c ParenState contains multiple segments of a |
| 188 | /// builder-type call on one line. |
| 189 | bool ContainsUnwrappedBuilder; |
| 190 | |
Daniel Jasper | 6b2afe4 | 2013-08-16 11:20:30 +0000 | [diff] [blame] | 191 | bool operator<(const ParenState &Other) const { |
| 192 | if (Indent != Other.Indent) |
| 193 | return Indent < Other.Indent; |
| 194 | if (LastSpace != Other.LastSpace) |
| 195 | return LastSpace < Other.LastSpace; |
| 196 | if (FirstLessLess != Other.FirstLessLess) |
| 197 | return FirstLessLess < Other.FirstLessLess; |
| 198 | if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) |
| 199 | return BreakBeforeClosingBrace; |
| 200 | if (QuestionColumn != Other.QuestionColumn) |
| 201 | return QuestionColumn < Other.QuestionColumn; |
| 202 | if (AvoidBinPacking != Other.AvoidBinPacking) |
| 203 | return AvoidBinPacking; |
| 204 | if (BreakBeforeParameter != Other.BreakBeforeParameter) |
| 205 | return BreakBeforeParameter; |
| 206 | if (NoLineBreak != Other.NoLineBreak) |
| 207 | return NoLineBreak; |
| 208 | if (ColonPos != Other.ColonPos) |
| 209 | return ColonPos < Other.ColonPos; |
| 210 | if (StartOfFunctionCall != Other.StartOfFunctionCall) |
| 211 | return StartOfFunctionCall < Other.StartOfFunctionCall; |
| 212 | if (StartOfArraySubscripts != Other.StartOfArraySubscripts) |
| 213 | return StartOfArraySubscripts < Other.StartOfArraySubscripts; |
| 214 | if (CallContinuation != Other.CallContinuation) |
| 215 | return CallContinuation < Other.CallContinuation; |
| 216 | if (VariablePos != Other.VariablePos) |
| 217 | return VariablePos < Other.VariablePos; |
| 218 | if (ContainsLineBreak != Other.ContainsLineBreak) |
| 219 | return ContainsLineBreak < Other.ContainsLineBreak; |
Daniel Jasper | d3fef0f | 2013-08-27 14:24:43 +0000 | [diff] [blame] | 220 | if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder) |
| 221 | return ContainsUnwrappedBuilder < Other.ContainsUnwrappedBuilder; |
Daniel Jasper | 6b2afe4 | 2013-08-16 11:20:30 +0000 | [diff] [blame] | 222 | return false; |
| 223 | } |
| 224 | }; |
| 225 | |
| 226 | /// \brief The current state when indenting a unwrapped line. |
| 227 | /// |
| 228 | /// As the indenting tries different combinations this is copied by value. |
| 229 | struct LineState { |
| 230 | /// \brief The number of used columns in the current line. |
| 231 | unsigned Column; |
| 232 | |
| 233 | /// \brief The token that needs to be next formatted. |
| 234 | const FormatToken *NextToken; |
| 235 | |
| 236 | /// \brief \c true if this line contains a continued for-loop section. |
| 237 | bool LineContainsContinuedForLoopSection; |
| 238 | |
| 239 | /// \brief The level of nesting inside (), [], <> and {}. |
| 240 | unsigned ParenLevel; |
| 241 | |
| 242 | /// \brief The \c ParenLevel at the start of this line. |
| 243 | unsigned StartOfLineLevel; |
| 244 | |
| 245 | /// \brief The lowest \c ParenLevel on the current line. |
| 246 | unsigned LowestLevelOnLine; |
| 247 | |
| 248 | /// \brief The start column of the string literal, if we're in a string |
| 249 | /// literal sequence, 0 otherwise. |
| 250 | unsigned StartOfStringLiteral; |
| 251 | |
| 252 | /// \brief A stack keeping track of properties applying to parenthesis |
| 253 | /// levels. |
| 254 | std::vector<ParenState> Stack; |
| 255 | |
| 256 | /// \brief Ignore the stack of \c ParenStates for state comparison. |
| 257 | /// |
| 258 | /// In long and deeply nested unwrapped lines, the current algorithm can |
| 259 | /// be insufficient for finding the best formatting with a reasonable amount |
| 260 | /// of time and memory. Setting this flag will effectively lead to the |
| 261 | /// algorithm not analyzing some combinations. However, these combinations |
| 262 | /// rarely contain the optimal solution: In short, accepting a higher |
| 263 | /// penalty early would need to lead to different values in the \c |
| 264 | /// ParenState stack (in an otherwise identical state) and these different |
| 265 | /// values would need to lead to a significant amount of avoided penalty |
| 266 | /// later. |
| 267 | /// |
| 268 | /// FIXME: Come up with a better algorithm instead. |
| 269 | bool IgnoreStackForComparison; |
| 270 | |
Daniel Jasper | 567dcf9 | 2013-09-05 09:29:45 +0000 | [diff] [blame] | 271 | /// \brief The indent of the first token. |
| 272 | unsigned FirstIndent; |
| 273 | |
| 274 | /// \brief The line that is being formatted. |
| 275 | /// |
| 276 | /// Does not need to be considered for memoization because it doesn't change. |
| 277 | const AnnotatedLine *Line; |
| 278 | |
Daniel Jasper | 6b2afe4 | 2013-08-16 11:20:30 +0000 | [diff] [blame] | 279 | /// \brief Comparison operator to be able to used \c LineState in \c map. |
| 280 | bool operator<(const LineState &Other) const { |
| 281 | if (NextToken != Other.NextToken) |
| 282 | return NextToken < Other.NextToken; |
| 283 | if (Column != Other.Column) |
| 284 | return Column < Other.Column; |
| 285 | if (LineContainsContinuedForLoopSection != |
| 286 | Other.LineContainsContinuedForLoopSection) |
| 287 | return LineContainsContinuedForLoopSection; |
| 288 | if (ParenLevel != Other.ParenLevel) |
| 289 | return ParenLevel < Other.ParenLevel; |
| 290 | if (StartOfLineLevel != Other.StartOfLineLevel) |
| 291 | return StartOfLineLevel < Other.StartOfLineLevel; |
| 292 | if (LowestLevelOnLine != Other.LowestLevelOnLine) |
| 293 | return LowestLevelOnLine < Other.LowestLevelOnLine; |
| 294 | if (StartOfStringLiteral != Other.StartOfStringLiteral) |
| 295 | return StartOfStringLiteral < Other.StartOfStringLiteral; |
| 296 | if (IgnoreStackForComparison || Other.IgnoreStackForComparison) |
| 297 | return false; |
| 298 | return Stack < Other.Stack; |
| 299 | } |
| 300 | }; |
| 301 | |
| 302 | } // end namespace format |
| 303 | } // end namespace clang |
| 304 | |
| 305 | #endif // LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H |