Daniel Jasper | 6fe2f00 | 2013-04-25 08:56:26 +0000 | [diff] [blame] | 1 | //===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===// |
Alexander Kornienko | cb45bc1 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | /// |
| 10 | /// \file |
| 11 | /// \brief WhitespaceManager class manages whitespace around tokens and their |
| 12 | /// replacements. |
| 13 | /// |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
Benjamin Kramer | 2f5db8b | 2014-08-13 16:25:19 +0000 | [diff] [blame] | 16 | #ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H |
| 17 | #define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H |
Alexander Kornienko | cb45bc1 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 18 | |
| 19 | #include "TokenAnnotator.h" |
| 20 | #include "clang/Basic/SourceManager.h" |
| 21 | #include "clang/Format/Format.h" |
| 22 | #include <string> |
| 23 | |
| 24 | namespace clang { |
| 25 | namespace format { |
| 26 | |
| 27 | /// \brief Manages the whitespaces around tokens and their replacements. |
| 28 | /// |
| 29 | /// This includes special handling for certain constructs, e.g. the alignment of |
| 30 | /// trailing line comments. |
Manuel Klimek | 4fe4300 | 2013-05-22 12:51:29 +0000 | [diff] [blame] | 31 | /// |
| 32 | /// To guarantee correctness of alignment operations, the \c WhitespaceManager |
| 33 | /// must be informed about every token in the source file; for each token, there |
| 34 | /// must be exactly one call to either \c replaceWhitespace or |
| 35 | /// \c addUntouchableToken. |
| 36 | /// |
| 37 | /// There may be multiple calls to \c breakToken for a given token. |
Alexander Kornienko | cb45bc1 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 38 | class WhitespaceManager { |
| 39 | public: |
Eric Liu | 635423e | 2016-04-28 07:52:03 +0000 | [diff] [blame] | 40 | WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style, |
Alexander Kornienko | 9e649af | 2013-09-11 12:25:57 +0000 | [diff] [blame] | 41 | bool UseCRLF) |
| 42 | : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {} |
Alexander Kornienko | cb45bc1 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 43 | |
| 44 | /// \brief Replaces the whitespace in front of \p Tok. Only call once for |
| 45 | /// each \c AnnotatedToken. |
Krasimir Georgiev | 54ef4ba | 2017-06-13 14:58:55 +0000 | [diff] [blame] | 46 | /// |
| 47 | /// \p StartOfTokenColumn is the column at which the token will start after |
| 48 | /// this replacement. It is needed for determining how \p Spaces is turned |
| 49 | /// into tabs and spaces for some format styles. |
Daniel Jasper | 7d42f3f | 2017-01-31 11:25:01 +0000 | [diff] [blame] | 50 | void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, |
Alexander Kornienko | 3c3d09c | 2013-09-27 16:14:22 +0000 | [diff] [blame] | 51 | unsigned StartOfTokenColumn, |
Manuel Klimek | 4fe4300 | 2013-05-22 12:51:29 +0000 | [diff] [blame] | 52 | bool InPPDirective = false); |
Alexander Kornienko | cb45bc1 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 53 | |
Alp Toker | f6a24ce | 2013-12-05 16:25:25 +0000 | [diff] [blame] | 54 | /// \brief Adds information about an unchangeable token's whitespace. |
Alexander Kornienko | cb45bc1 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 55 | /// |
Manuel Klimek | 4fe4300 | 2013-05-22 12:51:29 +0000 | [diff] [blame] | 56 | /// Needs to be called for every token for which \c replaceWhitespace |
| 57 | /// was not called. |
| 58 | void addUntouchableToken(const FormatToken &Tok, bool InPPDirective); |
Alexander Kornienko | cb45bc1 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 59 | |
Alexander Kornienko | 555efc3 | 2013-06-11 16:01:49 +0000 | [diff] [blame] | 60 | /// \brief Inserts or replaces whitespace in the middle of a token. |
Alexander Kornienko | cb45bc1 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 61 | /// |
Alexander Kornienko | 555efc3 | 2013-06-11 16:01:49 +0000 | [diff] [blame] | 62 | /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix |
| 63 | /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars |
| 64 | /// characters. |
Alexander Kornienko | cb45bc1 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 65 | /// |
Alexander Kornienko | 67d9c8c | 2014-04-17 16:12:46 +0000 | [diff] [blame] | 66 | /// Note: \p Spaces can be negative to retain information about initial |
| 67 | /// relative column offset between a line of a block comment and the start of |
| 68 | /// the comment. This negative offset may be compensated by trailing comment |
| 69 | /// alignment here. In all other cases negative \p Spaces will be truncated to |
| 70 | /// 0. |
| 71 | /// |
Alexander Kornienko | 555efc3 | 2013-06-11 16:01:49 +0000 | [diff] [blame] | 72 | /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is |
| 73 | /// used to align backslashes correctly. |
| 74 | void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset, |
| 75 | unsigned ReplaceChars, |
| 76 | StringRef PreviousPostfix, |
| 77 | StringRef CurrentPrefix, bool InPPDirective, |
Daniel Jasper | 7d42f3f | 2017-01-31 11:25:01 +0000 | [diff] [blame] | 78 | unsigned Newlines, int Spaces); |
Alexander Kornienko | cb45bc1 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 79 | |
| 80 | /// \brief Returns all the \c Replacements created during formatting. |
| 81 | const tooling::Replacements &generateReplacements(); |
| 82 | |
Manuel Klimek | 4fe4300 | 2013-05-22 12:51:29 +0000 | [diff] [blame] | 83 | /// \brief Represents a change before a token, a break inside a token, |
| 84 | /// or the layout of an unchanged token (or whitespace within). |
| 85 | struct Change { |
| 86 | /// \brief Functor to sort changes in original source order. |
| 87 | class IsBeforeInFile { |
Daniel Jasper | 3ac9b9e | 2013-07-08 14:34:09 +0000 | [diff] [blame] | 88 | public: |
Manuel Klimek | 4fe4300 | 2013-05-22 12:51:29 +0000 | [diff] [blame] | 89 | IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} |
| 90 | bool operator()(const Change &C1, const Change &C2) const; |
Alexander Kornienko | cb45bc1 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 91 | |
Daniel Jasper | 3ac9b9e | 2013-07-08 14:34:09 +0000 | [diff] [blame] | 92 | private: |
Manuel Klimek | 4fe4300 | 2013-05-22 12:51:29 +0000 | [diff] [blame] | 93 | const SourceManager &SourceMgr; |
| 94 | }; |
| 95 | |
Manuel Klimek | 4fe4300 | 2013-05-22 12:51:29 +0000 | [diff] [blame] | 96 | /// \brief Creates a \c Change. |
| 97 | /// |
| 98 | /// The generated \c Change will replace the characters at |
| 99 | /// \p OriginalWhitespaceRange with a concatenation of |
| 100 | /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces |
| 101 | /// and \p CurrentLinePrefix. |
| 102 | /// |
| 103 | /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out |
| 104 | /// trailing comments and escaped newlines. |
Daniel Jasper | 7d42f3f | 2017-01-31 11:25:01 +0000 | [diff] [blame] | 105 | Change(const FormatToken &Tok, bool CreateReplacement, |
| 106 | SourceRange OriginalWhitespaceRange, int Spaces, |
| 107 | unsigned StartOfTokenColumn, unsigned NewlinesBefore, |
| 108 | StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, |
| 109 | bool ContinuesPPDirective, bool IsInsideToken); |
| 110 | |
| 111 | // The kind of the token whose whitespace this change replaces, or in which |
| 112 | // this change inserts whitespace. |
| 113 | // FIXME: Currently this is not set correctly for breaks inside comments, as |
| 114 | // the \c BreakableToken is still doing its own alignment. |
| 115 | const FormatToken *Tok; |
Manuel Klimek | 4fe4300 | 2013-05-22 12:51:29 +0000 | [diff] [blame] | 116 | |
| 117 | bool CreateReplacement; |
| 118 | // Changes might be in the middle of a token, so we cannot just keep the |
| 119 | // FormatToken around to query its information. |
| 120 | SourceRange OriginalWhitespaceRange; |
| 121 | unsigned StartOfTokenColumn; |
| 122 | unsigned NewlinesBefore; |
| 123 | std::string PreviousLinePostfix; |
| 124 | std::string CurrentLinePrefix; |
Manuel Klimek | 4fe4300 | 2013-05-22 12:51:29 +0000 | [diff] [blame] | 125 | bool ContinuesPPDirective; |
Alexander Kornienko | 3c3d09c | 2013-09-27 16:14:22 +0000 | [diff] [blame] | 126 | |
Manuel Klimek | 4fe4300 | 2013-05-22 12:51:29 +0000 | [diff] [blame] | 127 | // The number of spaces in front of the token or broken part of the token. |
| 128 | // This will be adapted when aligning tokens. |
Alexander Kornienko | 67d9c8c | 2014-04-17 16:12:46 +0000 | [diff] [blame] | 129 | // Can be negative to retain information about the initial relative offset |
| 130 | // of the lines in a block comment. This is used when aligning trailing |
| 131 | // comments. Uncompensated negative offset is truncated to 0. |
| 132 | int Spaces; |
Manuel Klimek | 4fe4300 | 2013-05-22 12:51:29 +0000 | [diff] [blame] | 133 | |
Benjamin Kramer | dab5046 | 2016-01-11 16:27:16 +0000 | [diff] [blame] | 134 | // If this change is inside of a token but not at the start of the token or |
| 135 | // directly after a newline. |
| 136 | bool IsInsideToken; |
| 137 | |
Manuel Klimek | 4fe4300 | 2013-05-22 12:51:29 +0000 | [diff] [blame] | 138 | // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and |
| 139 | // \c EscapedNewlineColumn will be calculated in |
| 140 | // \c calculateLineBreakInformation. |
| 141 | bool IsTrailingComment; |
| 142 | unsigned TokenLength; |
| 143 | unsigned PreviousEndOfTokenColumn; |
| 144 | unsigned EscapedNewlineColumn; |
Alexander Kornienko | 67d9c8c | 2014-04-17 16:12:46 +0000 | [diff] [blame] | 145 | |
| 146 | // These fields are used to retain correct relative line indentation in a |
| 147 | // block comment when aligning trailing comments. |
| 148 | // |
| 149 | // If this Change represents a continuation of a block comment, |
| 150 | // \c StartOfBlockComment is pointer to the first Change in the block |
| 151 | // comment. \c IndentationOffset is a relative column offset to this |
| 152 | // change, so that the correct column can be reconstructed at the end of |
| 153 | // the alignment process. |
| 154 | const Change *StartOfBlockComment; |
| 155 | int IndentationOffset; |
Nikola Smiljanic | 92b397f | 2017-03-23 02:51:25 +0000 | [diff] [blame] | 156 | |
| 157 | // A combination of nesting level and indent level, which are used in |
| 158 | // tandem to compute lexical scope, for the purposes of deciding |
| 159 | // when to stop consecutive alignment runs. |
| 160 | std::pair<unsigned, unsigned> |
| 161 | nestingAndIndentLevel() const { |
| 162 | return std::make_pair(Tok->NestingLevel, Tok->IndentLevel); |
| 163 | } |
Manuel Klimek | 4fe4300 | 2013-05-22 12:51:29 +0000 | [diff] [blame] | 164 | }; |
| 165 | |
Daniel Jasper | ec90e51 | 2015-12-01 12:00:43 +0000 | [diff] [blame] | 166 | private: |
Manuel Klimek | 4fe4300 | 2013-05-22 12:51:29 +0000 | [diff] [blame] | 167 | /// \brief Calculate \c IsTrailingComment, \c TokenLength for the last tokens |
| 168 | /// or token parts in a line and \c PreviousEndOfTokenColumn and |
| 169 | /// \c EscapedNewlineColumn for the first tokens or token parts in a line. |
| 170 | void calculateLineBreakInformation(); |
| 171 | |
Daniel Jasper | a4499133 | 2015-04-29 13:06:49 +0000 | [diff] [blame] | 172 | /// \brief Align consecutive assignments over all \c Changes. |
| 173 | void alignConsecutiveAssignments(); |
| 174 | |
Daniel Jasper | e12597c | 2015-10-01 10:06:54 +0000 | [diff] [blame] | 175 | /// \brief Align consecutive declarations over all \c Changes. |
| 176 | void alignConsecutiveDeclarations(); |
| 177 | |
Manuel Klimek | 4fe4300 | 2013-05-22 12:51:29 +0000 | [diff] [blame] | 178 | /// \brief Align trailing comments over all \c Changes. |
| 179 | void alignTrailingComments(); |
| 180 | |
| 181 | /// \brief Align trailing comments from change \p Start to change \p End at |
| 182 | /// the specified \p Column. |
| 183 | void alignTrailingComments(unsigned Start, unsigned End, unsigned Column); |
| 184 | |
| 185 | /// \brief Align escaped newlines over all \c Changes. |
Daniel Jasper | 6fe2f00 | 2013-04-25 08:56:26 +0000 | [diff] [blame] | 186 | void alignEscapedNewlines(); |
Daniel Jasper | 770eb7c | 2013-04-24 06:33:59 +0000 | [diff] [blame] | 187 | |
Manuel Klimek | 4fe4300 | 2013-05-22 12:51:29 +0000 | [diff] [blame] | 188 | /// \brief Align escaped newlines from change \p Start to change \p End at |
| 189 | /// the specified \p Column. |
| 190 | void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column); |
| 191 | |
| 192 | /// \brief Fill \c Replaces with the replacements for all effective changes. |
| 193 | void generateChanges(); |
| 194 | |
| 195 | /// \brief Stores \p Text as the replacement for the whitespace in \p Range. |
Craig Topper | e335f25 | 2015-10-04 04:53:55 +0000 | [diff] [blame] | 196 | void storeReplacement(SourceRange Range, StringRef Text); |
Alexander Kornienko | 9e649af | 2013-09-11 12:25:57 +0000 | [diff] [blame] | 197 | void appendNewlineText(std::string &Text, unsigned Newlines); |
Jacob Bandes-Storch | d6a7e98 | 2017-08-10 00:15:31 +0000 | [diff] [blame^] | 198 | void appendEscapedNewlineText(std::string &Text, unsigned Newlines, |
| 199 | unsigned PreviousEndOfTokenColumn, |
| 200 | unsigned EscapedNewlineColumn); |
Alexander Kornienko | 3c3d09c | 2013-09-27 16:14:22 +0000 | [diff] [blame] | 201 | void appendIndentText(std::string &Text, unsigned IndentLevel, |
| 202 | unsigned Spaces, unsigned WhitespaceStartColumn); |
Manuel Klimek | b9eae4c | 2013-05-13 09:22:11 +0000 | [diff] [blame] | 203 | |
Manuel Klimek | 4fe4300 | 2013-05-22 12:51:29 +0000 | [diff] [blame] | 204 | SmallVector<Change, 16> Changes; |
Eric Liu | 635423e | 2016-04-28 07:52:03 +0000 | [diff] [blame] | 205 | const SourceManager &SourceMgr; |
Alexander Kornienko | cb45bc1 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 206 | tooling::Replacements Replaces; |
| 207 | const FormatStyle &Style; |
Alexander Kornienko | 9e649af | 2013-09-11 12:25:57 +0000 | [diff] [blame] | 208 | bool UseCRLF; |
Alexander Kornienko | cb45bc1 | 2013-04-15 14:28:00 +0000 | [diff] [blame] | 209 | }; |
| 210 | |
| 211 | } // namespace format |
| 212 | } // namespace clang |
| 213 | |
Benjamin Kramer | 2f5db8b | 2014-08-13 16:25:19 +0000 | [diff] [blame] | 214 | #endif |