Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 1 | //===--- Quality.h - Ranking alternatives for ambiguous queries -*- C++-*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===---------------------------------------------------------------------===// |
| 9 | /// |
| 10 | /// Some operations such as code completion produce a set of candidates. |
| 11 | /// Usually the user can choose between them, but we should put the best options |
| 12 | /// at the top (they're easier to select, and more likely to be seen). |
| 13 | /// |
| 14 | /// This file defines building blocks for ranking candidates. |
| 15 | /// It's used by the features directly and also in the implementation of |
| 16 | /// indexes, as indexes also need to heuristically limit their results. |
| 17 | /// |
| 18 | /// The facilities here are: |
| 19 | /// - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString |
| 20 | /// These are structured in a way that they can be debugged, and are fairly |
| 21 | /// consistent regardless of the source. |
| 22 | /// - compute scores from scoring signals. These are suitable for sorting. |
| 23 | /// - sorting utilities like the TopN container. |
| 24 | /// These could be split up further to isolate dependencies if we care. |
| 25 | /// |
| 26 | //===---------------------------------------------------------------------===// |
| 27 | #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H |
| 28 | #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H |
Eric Liu | 5d2a807 | 2018-07-23 10:56:37 +0000 | [diff] [blame] | 29 | #include "clang/Sema/CodeCompleteConsumer.h" |
Eric Liu | 09c3c37 | 2018-06-15 08:58:12 +0000 | [diff] [blame] | 30 | #include "llvm/ADT/ArrayRef.h" |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 31 | #include "llvm/ADT/StringRef.h" |
| 32 | #include <algorithm> |
| 33 | #include <functional> |
| 34 | #include <vector> |
| 35 | namespace llvm { |
| 36 | class raw_ostream; |
| 37 | } |
| 38 | namespace clang { |
| 39 | class CodeCompletionResult; |
| 40 | namespace clangd { |
| 41 | struct Symbol; |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 42 | class URIDistance; |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 43 | |
| 44 | // Signals structs are designed to be aggregated from 0 or more sources. |
| 45 | // A default instance has neutral signals, and sources are merged into it. |
| 46 | // They can be dumped for debugging, and evaluate()d into a score. |
| 47 | |
| 48 | /// Attributes of a symbol that affect how much we like it. |
| 49 | struct SymbolQualitySignals { |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 50 | bool Deprecated = false; |
Sam McCall | e018b36 | 2018-06-08 09:36:34 +0000 | [diff] [blame] | 51 | bool ReservedName = false; // __foo, _Foo are usually implementation details. |
| 52 | // FIXME: make these findable once user types _. |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 53 | unsigned References = 0; |
| 54 | |
Sam McCall | 4a3c69b | 2018-06-06 08:53:36 +0000 | [diff] [blame] | 55 | enum SymbolCategory { |
Sam McCall | c3b5bad | 2018-06-14 13:42:21 +0000 | [diff] [blame] | 56 | Unknown = 0, |
Sam McCall | 4a3c69b | 2018-06-06 08:53:36 +0000 | [diff] [blame] | 57 | Variable, |
| 58 | Macro, |
| 59 | Type, |
| 60 | Function, |
Eric Liu | d7de811 | 2018-07-24 08:51:52 +0000 | [diff] [blame] | 61 | Constructor, |
Sam McCall | 4a3c69b | 2018-06-06 08:53:36 +0000 | [diff] [blame] | 62 | Namespace, |
Sam McCall | c3b5bad | 2018-06-14 13:42:21 +0000 | [diff] [blame] | 63 | Keyword, |
Sam McCall | 4a3c69b | 2018-06-06 08:53:36 +0000 | [diff] [blame] | 64 | } Category = Unknown; |
| 65 | |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 66 | void merge(const CodeCompletionResult &SemaCCResult); |
| 67 | void merge(const Symbol &IndexResult); |
| 68 | |
| 69 | // Condense these signals down to a single number, higher is better. |
| 70 | float evaluate() const; |
| 71 | }; |
| 72 | llvm::raw_ostream &operator<<(llvm::raw_ostream &, |
| 73 | const SymbolQualitySignals &); |
| 74 | |
| 75 | /// Attributes of a symbol-query pair that affect how much we like it. |
| 76 | struct SymbolRelevanceSignals { |
Sam McCall | bc7cbb7 | 2018-06-06 12:38:37 +0000 | [diff] [blame] | 77 | /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned. |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 78 | float NameMatch = 1; |
| 79 | bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private). |
Eric Liu | 09c3c37 | 2018-06-15 08:58:12 +0000 | [diff] [blame] | 80 | |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 81 | URIDistance *FileProximityMatch = nullptr; |
Eric Liu | 09c3c37 | 2018-06-15 08:58:12 +0000 | [diff] [blame] | 82 | /// This is used to calculate proximity between the index symbol and the |
| 83 | /// query. |
| 84 | llvm::StringRef SymbolURI; |
Sam McCall | db41e1c | 2018-06-05 12:22:43 +0000 | [diff] [blame] | 85 | /// Proximity between best declaration and the query. [0-1], 1 is closest. |
Eric Liu | 09c3c37 | 2018-06-15 08:58:12 +0000 | [diff] [blame] | 86 | /// FIXME: unify with index proximity score - signals should be |
| 87 | /// source-independent. |
| 88 | float SemaProximityScore = 0; |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 89 | |
Sam McCall | d9b54f0 | 2018-06-05 16:30:25 +0000 | [diff] [blame] | 90 | // An approximate measure of where we expect the symbol to be used. |
| 91 | enum AccessibleScope { |
| 92 | FunctionScope, |
| 93 | ClassScope, |
| 94 | FileScope, |
| 95 | GlobalScope, |
| 96 | } Scope = GlobalScope; |
| 97 | |
| 98 | enum QueryType { |
| 99 | CodeComplete, |
| 100 | Generic, |
| 101 | } Query = Generic; |
| 102 | |
Eric Liu | 5d2a807 | 2018-07-23 10:56:37 +0000 | [diff] [blame] | 103 | CodeCompletionContext::Kind Context = CodeCompletionContext::CCC_Other; |
| 104 | |
| 105 | // Whether symbol is an instance member of a class. |
| 106 | bool IsInstanceMember = false; |
| 107 | |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 108 | void merge(const CodeCompletionResult &SemaResult); |
Sam McCall | d9b54f0 | 2018-06-05 16:30:25 +0000 | [diff] [blame] | 109 | void merge(const Symbol &IndexResult); |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 110 | |
| 111 | // Condense these signals down to a single number, higher is better. |
| 112 | float evaluate() const; |
| 113 | }; |
| 114 | llvm::raw_ostream &operator<<(llvm::raw_ostream &, |
| 115 | const SymbolRelevanceSignals &); |
| 116 | |
| 117 | /// Combine symbol quality and relevance into a single score. |
| 118 | float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance); |
| 119 | |
| 120 | /// TopN<T> is a lossy container that preserves only the "best" N elements. |
| 121 | template <typename T, typename Compare = std::greater<T>> class TopN { |
| 122 | public: |
| 123 | using value_type = T; |
| 124 | TopN(size_t N, Compare Greater = Compare()) |
| 125 | : N(N), Greater(std::move(Greater)) {} |
| 126 | |
| 127 | // Adds a candidate to the set. |
| 128 | // Returns true if a candidate was dropped to get back under N. |
| 129 | bool push(value_type &&V) { |
| 130 | bool Dropped = false; |
| 131 | if (Heap.size() >= N) { |
| 132 | Dropped = true; |
| 133 | if (N > 0 && Greater(V, Heap.front())) { |
| 134 | std::pop_heap(Heap.begin(), Heap.end(), Greater); |
| 135 | Heap.back() = std::move(V); |
| 136 | std::push_heap(Heap.begin(), Heap.end(), Greater); |
| 137 | } |
| 138 | } else { |
| 139 | Heap.push_back(std::move(V)); |
| 140 | std::push_heap(Heap.begin(), Heap.end(), Greater); |
| 141 | } |
| 142 | assert(Heap.size() <= N); |
| 143 | assert(std::is_heap(Heap.begin(), Heap.end(), Greater)); |
| 144 | return Dropped; |
| 145 | } |
| 146 | |
| 147 | // Returns candidates from best to worst. |
| 148 | std::vector<value_type> items() && { |
| 149 | std::sort_heap(Heap.begin(), Heap.end(), Greater); |
| 150 | assert(Heap.size() <= N); |
| 151 | return std::move(Heap); |
| 152 | } |
| 153 | |
| 154 | private: |
| 155 | const size_t N; |
| 156 | std::vector<value_type> Heap; // Min-heap, comparator is Greater. |
| 157 | Compare Greater; |
| 158 | }; |
| 159 | |
Ilya Biryukov | 8573def | 2018-05-30 12:41:19 +0000 | [diff] [blame] | 160 | /// Returns a string that sorts in the same order as (-Score, Tiebreak), for |
| 161 | /// LSP. (The highest score compares smallest so it sorts at the top). |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 162 | std::string sortText(float Score, llvm::StringRef Tiebreak = ""); |
| 163 | |
| 164 | } // namespace clangd |
| 165 | } // namespace clang |
| 166 | |
| 167 | #endif |