Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 1 | //===--- Quality.h - Ranking alternatives for ambiguous queries -*- C++-*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===---------------------------------------------------------------------===// |
| 9 | /// |
| 10 | /// Some operations such as code completion produce a set of candidates. |
| 11 | /// Usually the user can choose between them, but we should put the best options |
| 12 | /// at the top (they're easier to select, and more likely to be seen). |
| 13 | /// |
| 14 | /// This file defines building blocks for ranking candidates. |
| 15 | /// It's used by the features directly and also in the implementation of |
| 16 | /// indexes, as indexes also need to heuristically limit their results. |
| 17 | /// |
| 18 | /// The facilities here are: |
| 19 | /// - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString |
| 20 | /// These are structured in a way that they can be debugged, and are fairly |
| 21 | /// consistent regardless of the source. |
| 22 | /// - compute scores from scoring signals. These are suitable for sorting. |
| 23 | /// - sorting utilities like the TopN container. |
| 24 | /// These could be split up further to isolate dependencies if we care. |
| 25 | /// |
| 26 | //===---------------------------------------------------------------------===// |
| 27 | #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H |
| 28 | #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H |
| 29 | #include "llvm/ADT/StringRef.h" |
| 30 | #include <algorithm> |
| 31 | #include <functional> |
| 32 | #include <vector> |
| 33 | namespace llvm { |
| 34 | class raw_ostream; |
| 35 | } |
| 36 | namespace clang { |
| 37 | class CodeCompletionResult; |
| 38 | namespace clangd { |
| 39 | struct Symbol; |
| 40 | |
| 41 | // Signals structs are designed to be aggregated from 0 or more sources. |
| 42 | // A default instance has neutral signals, and sources are merged into it. |
| 43 | // They can be dumped for debugging, and evaluate()d into a score. |
| 44 | |
| 45 | /// Attributes of a symbol that affect how much we like it. |
| 46 | struct SymbolQualitySignals { |
| 47 | unsigned SemaCCPriority = 0; // 1-80, 1 is best. 0 means absent. |
| 48 | // FIXME: this is actually a mix of symbol |
| 49 | // quality and relevance. Untangle this. |
| 50 | bool Deprecated = false; |
| 51 | unsigned References = 0; |
| 52 | |
Sam McCall | 4a3c69b | 2018-06-06 08:53:36 +0000 | [diff] [blame] | 53 | enum SymbolCategory { |
| 54 | Variable, |
| 55 | Macro, |
| 56 | Type, |
| 57 | Function, |
| 58 | Namespace, |
| 59 | Unknown, |
| 60 | } Category = Unknown; |
| 61 | |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 62 | void merge(const CodeCompletionResult &SemaCCResult); |
| 63 | void merge(const Symbol &IndexResult); |
| 64 | |
| 65 | // Condense these signals down to a single number, higher is better. |
| 66 | float evaluate() const; |
| 67 | }; |
| 68 | llvm::raw_ostream &operator<<(llvm::raw_ostream &, |
| 69 | const SymbolQualitySignals &); |
| 70 | |
| 71 | /// Attributes of a symbol-query pair that affect how much we like it. |
| 72 | struct SymbolRelevanceSignals { |
Sam McCall | bc7cbb7 | 2018-06-06 12:38:37 +0000 | [diff] [blame^] | 73 | /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned. |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 74 | float NameMatch = 1; |
| 75 | bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private). |
Sam McCall | db41e1c | 2018-06-05 12:22:43 +0000 | [diff] [blame] | 76 | /// Proximity between best declaration and the query. [0-1], 1 is closest. |
Ilya Biryukov | f029646 | 2018-06-04 14:50:59 +0000 | [diff] [blame] | 77 | float ProximityScore = 0; |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 78 | |
Sam McCall | d9b54f0 | 2018-06-05 16:30:25 +0000 | [diff] [blame] | 79 | // An approximate measure of where we expect the symbol to be used. |
| 80 | enum AccessibleScope { |
| 81 | FunctionScope, |
| 82 | ClassScope, |
| 83 | FileScope, |
| 84 | GlobalScope, |
| 85 | } Scope = GlobalScope; |
| 86 | |
| 87 | enum QueryType { |
| 88 | CodeComplete, |
| 89 | Generic, |
| 90 | } Query = Generic; |
| 91 | |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 92 | void merge(const CodeCompletionResult &SemaResult); |
Sam McCall | d9b54f0 | 2018-06-05 16:30:25 +0000 | [diff] [blame] | 93 | void merge(const Symbol &IndexResult); |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 94 | |
| 95 | // Condense these signals down to a single number, higher is better. |
| 96 | float evaluate() const; |
| 97 | }; |
| 98 | llvm::raw_ostream &operator<<(llvm::raw_ostream &, |
| 99 | const SymbolRelevanceSignals &); |
| 100 | |
| 101 | /// Combine symbol quality and relevance into a single score. |
| 102 | float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance); |
| 103 | |
| 104 | /// TopN<T> is a lossy container that preserves only the "best" N elements. |
| 105 | template <typename T, typename Compare = std::greater<T>> class TopN { |
| 106 | public: |
| 107 | using value_type = T; |
| 108 | TopN(size_t N, Compare Greater = Compare()) |
| 109 | : N(N), Greater(std::move(Greater)) {} |
| 110 | |
| 111 | // Adds a candidate to the set. |
| 112 | // Returns true if a candidate was dropped to get back under N. |
| 113 | bool push(value_type &&V) { |
| 114 | bool Dropped = false; |
| 115 | if (Heap.size() >= N) { |
| 116 | Dropped = true; |
| 117 | if (N > 0 && Greater(V, Heap.front())) { |
| 118 | std::pop_heap(Heap.begin(), Heap.end(), Greater); |
| 119 | Heap.back() = std::move(V); |
| 120 | std::push_heap(Heap.begin(), Heap.end(), Greater); |
| 121 | } |
| 122 | } else { |
| 123 | Heap.push_back(std::move(V)); |
| 124 | std::push_heap(Heap.begin(), Heap.end(), Greater); |
| 125 | } |
| 126 | assert(Heap.size() <= N); |
| 127 | assert(std::is_heap(Heap.begin(), Heap.end(), Greater)); |
| 128 | return Dropped; |
| 129 | } |
| 130 | |
| 131 | // Returns candidates from best to worst. |
| 132 | std::vector<value_type> items() && { |
| 133 | std::sort_heap(Heap.begin(), Heap.end(), Greater); |
| 134 | assert(Heap.size() <= N); |
| 135 | return std::move(Heap); |
| 136 | } |
| 137 | |
| 138 | private: |
| 139 | const size_t N; |
| 140 | std::vector<value_type> Heap; // Min-heap, comparator is Greater. |
| 141 | Compare Greater; |
| 142 | }; |
| 143 | |
Ilya Biryukov | 8573def | 2018-05-30 12:41:19 +0000 | [diff] [blame] | 144 | /// Returns a string that sorts in the same order as (-Score, Tiebreak), for |
| 145 | /// LSP. (The highest score compares smallest so it sorts at the top). |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 146 | std::string sortText(float Score, llvm::StringRef Tiebreak = ""); |
| 147 | |
| 148 | } // namespace clangd |
| 149 | } // namespace clang |
| 150 | |
| 151 | #endif |