Stephen Hines | 0e2c34f | 2015-03-23 12:09:02 -0700 | [diff] [blame] | 1 | //===--- UnwrappedLineFormatter.h - Format C++ code -------------*- C++ -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | /// |
| 10 | /// \file |
| 11 | /// \brief Implements a combinartorial exploration of all the different |
| 12 | /// linebreaks unwrapped lines can be formatted in. |
| 13 | /// |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEFORMATTER_H |
| 17 | #define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEFORMATTER_H |
| 18 | |
| 19 | #include "ContinuationIndenter.h" |
| 20 | #include "clang/Format/Format.h" |
| 21 | #include <map> |
| 22 | #include <queue> |
| 23 | #include <string> |
| 24 | |
| 25 | namespace clang { |
| 26 | namespace format { |
| 27 | |
| 28 | class ContinuationIndenter; |
| 29 | class WhitespaceManager; |
| 30 | |
| 31 | class UnwrappedLineFormatter { |
| 32 | public: |
| 33 | UnwrappedLineFormatter(ContinuationIndenter *Indenter, |
| 34 | WhitespaceManager *Whitespaces, |
| 35 | const FormatStyle &Style, |
| 36 | const AdditionalKeywords &Keywords) |
| 37 | : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), |
| 38 | Keywords(Keywords) {} |
| 39 | |
| 40 | unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun, |
| 41 | int AdditionalIndent = 0, bool FixBadIndentation = false); |
| 42 | |
| 43 | private: |
| 44 | /// \brief Formats an \c AnnotatedLine and returns the penalty. |
| 45 | /// |
| 46 | /// If \p DryRun is \c false, directly applies the changes. |
| 47 | unsigned format(const AnnotatedLine &Line, unsigned FirstIndent, |
| 48 | bool DryRun); |
| 49 | |
| 50 | /// \brief An edge in the solution space from \c Previous->State to \c State, |
| 51 | /// inserting a newline dependent on the \c NewLine. |
| 52 | struct StateNode { |
| 53 | StateNode(const LineState &State, bool NewLine, StateNode *Previous) |
| 54 | : State(State), NewLine(NewLine), Previous(Previous) {} |
| 55 | LineState State; |
| 56 | bool NewLine; |
| 57 | StateNode *Previous; |
| 58 | }; |
| 59 | |
| 60 | /// \brief A pair of <penalty, count> that is used to prioritize the BFS on. |
| 61 | /// |
| 62 | /// In case of equal penalties, we want to prefer states that were inserted |
| 63 | /// first. During state generation we make sure that we insert states first |
| 64 | /// that break the line as late as possible. |
| 65 | typedef std::pair<unsigned, unsigned> OrderedPenalty; |
| 66 | |
| 67 | /// \brief An item in the prioritized BFS search queue. The \c StateNode's |
| 68 | /// \c State has the given \c OrderedPenalty. |
| 69 | typedef std::pair<OrderedPenalty, StateNode *> QueueItem; |
| 70 | |
| 71 | /// \brief The BFS queue type. |
| 72 | typedef std::priority_queue<QueueItem, std::vector<QueueItem>, |
| 73 | std::greater<QueueItem> > QueueType; |
| 74 | |
| 75 | /// \brief Get the offset of the line relatively to the level. |
| 76 | /// |
| 77 | /// For example, 'public:' labels in classes are offset by 1 or 2 |
| 78 | /// characters to the left from their level. |
| 79 | int getIndentOffset(const FormatToken &RootToken) { |
| 80 | if (Style.Language == FormatStyle::LK_Java || |
| 81 | Style.Language == FormatStyle::LK_JavaScript) |
| 82 | return 0; |
| 83 | if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier()) |
| 84 | return Style.AccessModifierOffset; |
| 85 | return 0; |
| 86 | } |
| 87 | |
| 88 | /// \brief Add a new line and the required indent before the first Token |
| 89 | /// of the \c UnwrappedLine if there was no structural parsing error. |
| 90 | void formatFirstToken(FormatToken &RootToken, |
| 91 | const AnnotatedLine *PreviousLine, unsigned IndentLevel, |
| 92 | unsigned Indent, bool InPPDirective); |
| 93 | |
| 94 | /// \brief Get the indent of \p Level from \p IndentForLevel. |
| 95 | /// |
| 96 | /// \p IndentForLevel must contain the indent for the level \c l |
| 97 | /// at \p IndentForLevel[l], or a value < 0 if the indent for |
| 98 | /// that level is unknown. |
| 99 | unsigned getIndent(ArrayRef<int> IndentForLevel, unsigned Level); |
| 100 | |
| 101 | void join(AnnotatedLine &A, const AnnotatedLine &B); |
| 102 | |
| 103 | unsigned getColumnLimit(bool InPPDirective) const { |
| 104 | // In preprocessor directives reserve two chars for trailing " \" |
| 105 | return Style.ColumnLimit - (InPPDirective ? 2 : 0); |
| 106 | } |
| 107 | |
| 108 | struct CompareLineStatePointers { |
| 109 | bool operator()(LineState *obj1, LineState *obj2) const { |
| 110 | return *obj1 < *obj2; |
| 111 | } |
| 112 | }; |
| 113 | |
| 114 | /// \brief Analyze the entire solution space starting from \p InitialState. |
| 115 | /// |
| 116 | /// This implements a variant of Dijkstra's algorithm on the graph that spans |
| 117 | /// the solution space (\c LineStates are the nodes). The algorithm tries to |
| 118 | /// find the shortest path (the one with lowest penalty) from \p InitialState |
| 119 | /// to a state where all tokens are placed. Returns the penalty. |
| 120 | /// |
| 121 | /// If \p DryRun is \c false, directly applies the changes. |
| 122 | unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false); |
| 123 | |
| 124 | void reconstructPath(LineState &State, StateNode *Current); |
| 125 | |
| 126 | /// \brief Add the following state to the analysis queue \c Queue. |
| 127 | /// |
| 128 | /// Assume the current state is \p PreviousNode and has been reached with a |
| 129 | /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. |
| 130 | void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, |
| 131 | bool NewLine, unsigned *Count, QueueType *Queue); |
| 132 | |
| 133 | /// \brief If the \p State's next token is an r_brace closing a nested block, |
| 134 | /// format the nested block before it. |
| 135 | /// |
| 136 | /// Returns \c true if all children could be placed successfully and adapts |
| 137 | /// \p Penalty as well as \p State. If \p DryRun is false, also directly |
| 138 | /// creates changes using \c Whitespaces. |
| 139 | /// |
| 140 | /// The crucial idea here is that children always get formatted upon |
| 141 | /// encountering the closing brace right after the nested block. Now, if we |
| 142 | /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is |
| 143 | /// \c false), the entire block has to be kept on the same line (which is only |
| 144 | /// possible if it fits on the line, only contains a single statement, etc. |
| 145 | /// |
| 146 | /// If \p NewLine is true, we format the nested block on separate lines, i.e. |
| 147 | /// break after the "{", format all lines with correct indentation and the put |
| 148 | /// the closing "}" on yet another new line. |
| 149 | /// |
| 150 | /// This enables us to keep the simple structure of the |
| 151 | /// \c UnwrappedLineFormatter, where we only have two options for each token: |
| 152 | /// break or don't break. |
| 153 | bool formatChildren(LineState &State, bool NewLine, bool DryRun, |
| 154 | unsigned &Penalty); |
| 155 | |
| 156 | ContinuationIndenter *Indenter; |
| 157 | WhitespaceManager *Whitespaces; |
| 158 | FormatStyle Style; |
| 159 | const AdditionalKeywords &Keywords; |
| 160 | |
| 161 | llvm::SpecificBumpPtrAllocator<StateNode> Allocator; |
| 162 | |
| 163 | // Cache to store the penalty of formatting a vector of AnnotatedLines |
| 164 | // starting from a specific additional offset. Improves performance if there |
| 165 | // are many nested blocks. |
| 166 | std::map<std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned>, |
| 167 | unsigned> PenaltyCache; |
| 168 | }; |
| 169 | } // end namespace format |
| 170 | } // end namespace clang |
| 171 | |
| 172 | #endif // LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEFORMATTER_H |