Daniel Jasper | 0df5093 | 2014-12-10 19:00:42 +0000 | [diff] [blame] | 1 | //===--- UnwrappedLineFormatter.h - Format C++ code -------------*- C++ -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | /// |
| 10 | /// \file |
| 11 | /// \brief Implements a combinartorial exploration of all the different |
| 12 | /// linebreaks unwrapped lines can be formatted in. |
| 13 | /// |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEFORMATTER_H |
| 17 | #define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEFORMATTER_H |
| 18 | |
| 19 | #include "ContinuationIndenter.h" |
| 20 | #include "clang/Format/Format.h" |
| 21 | #include <map> |
| 22 | #include <queue> |
| 23 | #include <string> |
| 24 | |
| 25 | namespace clang { |
| 26 | namespace format { |
| 27 | |
| 28 | class ContinuationIndenter; |
| 29 | class WhitespaceManager; |
| 30 | |
| 31 | class UnwrappedLineFormatter { |
| 32 | public: |
| 33 | UnwrappedLineFormatter(ContinuationIndenter *Indenter, |
| 34 | WhitespaceManager *Whitespaces, |
Nico Weber | fac2371 | 2015-02-04 15:26:27 +0000 | [diff] [blame] | 35 | const FormatStyle &Style, |
| 36 | const AdditionalKeywords &Keywords) |
| 37 | : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), |
| 38 | Keywords(Keywords) {} |
Daniel Jasper | 0df5093 | 2014-12-10 19:00:42 +0000 | [diff] [blame] | 39 | |
| 40 | unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun, |
| 41 | int AdditionalIndent = 0, bool FixBadIndentation = false); |
| 42 | |
| 43 | private: |
| 44 | /// \brief Formats an \c AnnotatedLine and returns the penalty. |
| 45 | /// |
| 46 | /// If \p DryRun is \c false, directly applies the changes. |
| 47 | unsigned format(const AnnotatedLine &Line, unsigned FirstIndent, |
| 48 | bool DryRun); |
| 49 | |
| 50 | /// \brief An edge in the solution space from \c Previous->State to \c State, |
| 51 | /// inserting a newline dependent on the \c NewLine. |
| 52 | struct StateNode { |
| 53 | StateNode(const LineState &State, bool NewLine, StateNode *Previous) |
| 54 | : State(State), NewLine(NewLine), Previous(Previous) {} |
| 55 | LineState State; |
| 56 | bool NewLine; |
| 57 | StateNode *Previous; |
| 58 | }; |
| 59 | |
| 60 | /// \brief A pair of <penalty, count> that is used to prioritize the BFS on. |
| 61 | /// |
| 62 | /// In case of equal penalties, we want to prefer states that were inserted |
| 63 | /// first. During state generation we make sure that we insert states first |
| 64 | /// that break the line as late as possible. |
| 65 | typedef std::pair<unsigned, unsigned> OrderedPenalty; |
| 66 | |
| 67 | /// \brief An item in the prioritized BFS search queue. The \c StateNode's |
| 68 | /// \c State has the given \c OrderedPenalty. |
| 69 | typedef std::pair<OrderedPenalty, StateNode *> QueueItem; |
| 70 | |
| 71 | /// \brief The BFS queue type. |
| 72 | typedef std::priority_queue<QueueItem, std::vector<QueueItem>, |
| 73 | std::greater<QueueItem> > QueueType; |
| 74 | |
| 75 | /// \brief Get the offset of the line relatively to the level. |
| 76 | /// |
| 77 | /// For example, 'public:' labels in classes are offset by 1 or 2 |
| 78 | /// characters to the left from their level. |
| 79 | int getIndentOffset(const FormatToken &RootToken) { |
| 80 | if (Style.Language == FormatStyle::LK_Java) |
| 81 | return 0; |
| 82 | if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier()) |
| 83 | return Style.AccessModifierOffset; |
| 84 | return 0; |
| 85 | } |
| 86 | |
| 87 | /// \brief Add a new line and the required indent before the first Token |
| 88 | /// of the \c UnwrappedLine if there was no structural parsing error. |
| 89 | void formatFirstToken(FormatToken &RootToken, |
| 90 | const AnnotatedLine *PreviousLine, unsigned IndentLevel, |
| 91 | unsigned Indent, bool InPPDirective); |
| 92 | |
| 93 | /// \brief Get the indent of \p Level from \p IndentForLevel. |
| 94 | /// |
| 95 | /// \p IndentForLevel must contain the indent for the level \c l |
| 96 | /// at \p IndentForLevel[l], or a value < 0 if the indent for |
| 97 | /// that level is unknown. |
| 98 | unsigned getIndent(ArrayRef<int> IndentForLevel, unsigned Level); |
| 99 | |
| 100 | void join(AnnotatedLine &A, const AnnotatedLine &B); |
| 101 | |
| 102 | unsigned getColumnLimit(bool InPPDirective) const { |
| 103 | // In preprocessor directives reserve two chars for trailing " \" |
| 104 | return Style.ColumnLimit - (InPPDirective ? 2 : 0); |
| 105 | } |
| 106 | |
| 107 | struct CompareLineStatePointers { |
| 108 | bool operator()(LineState *obj1, LineState *obj2) const { |
| 109 | return *obj1 < *obj2; |
| 110 | } |
| 111 | }; |
| 112 | |
| 113 | /// \brief Analyze the entire solution space starting from \p InitialState. |
| 114 | /// |
| 115 | /// This implements a variant of Dijkstra's algorithm on the graph that spans |
| 116 | /// the solution space (\c LineStates are the nodes). The algorithm tries to |
| 117 | /// find the shortest path (the one with lowest penalty) from \p InitialState |
| 118 | /// to a state where all tokens are placed. Returns the penalty. |
| 119 | /// |
| 120 | /// If \p DryRun is \c false, directly applies the changes. |
| 121 | unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false); |
| 122 | |
| 123 | void reconstructPath(LineState &State, StateNode *Current); |
| 124 | |
| 125 | /// \brief Add the following state to the analysis queue \c Queue. |
| 126 | /// |
| 127 | /// Assume the current state is \p PreviousNode and has been reached with a |
| 128 | /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. |
| 129 | void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, |
| 130 | bool NewLine, unsigned *Count, QueueType *Queue); |
| 131 | |
| 132 | /// \brief If the \p State's next token is an r_brace closing a nested block, |
| 133 | /// format the nested block before it. |
| 134 | /// |
| 135 | /// Returns \c true if all children could be placed successfully and adapts |
| 136 | /// \p Penalty as well as \p State. If \p DryRun is false, also directly |
| 137 | /// creates changes using \c Whitespaces. |
| 138 | /// |
| 139 | /// The crucial idea here is that children always get formatted upon |
| 140 | /// encountering the closing brace right after the nested block. Now, if we |
| 141 | /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is |
| 142 | /// \c false), the entire block has to be kept on the same line (which is only |
| 143 | /// possible if it fits on the line, only contains a single statement, etc. |
| 144 | /// |
| 145 | /// If \p NewLine is true, we format the nested block on separate lines, i.e. |
| 146 | /// break after the "{", format all lines with correct indentation and the put |
| 147 | /// the closing "}" on yet another new line. |
| 148 | /// |
| 149 | /// This enables us to keep the simple structure of the |
| 150 | /// \c UnwrappedLineFormatter, where we only have two options for each token: |
| 151 | /// break or don't break. |
| 152 | bool formatChildren(LineState &State, bool NewLine, bool DryRun, |
| 153 | unsigned &Penalty); |
| 154 | |
| 155 | ContinuationIndenter *Indenter; |
| 156 | WhitespaceManager *Whitespaces; |
| 157 | FormatStyle Style; |
Nico Weber | fac2371 | 2015-02-04 15:26:27 +0000 | [diff] [blame] | 158 | const AdditionalKeywords &Keywords; |
Daniel Jasper | 0df5093 | 2014-12-10 19:00:42 +0000 | [diff] [blame] | 159 | |
| 160 | llvm::SpecificBumpPtrAllocator<StateNode> Allocator; |
| 161 | |
| 162 | // Cache to store the penalty of formatting a vector of AnnotatedLines |
| 163 | // starting from a specific additional offset. Improves performance if there |
| 164 | // are many nested blocks. |
| 165 | std::map<std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned>, |
| 166 | unsigned> PenaltyCache; |
| 167 | }; |
| 168 | } // end namespace format |
| 169 | } // end namespace clang |
| 170 | |
| 171 | #endif // LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEFORMATTER_H |