Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 1 | //===--- Quality.h - Ranking alternatives for ambiguous queries -*- C++-*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===---------------------------------------------------------------------===// |
| 9 | /// |
| 10 | /// Some operations such as code completion produce a set of candidates. |
| 11 | /// Usually the user can choose between them, but we should put the best options |
| 12 | /// at the top (they're easier to select, and more likely to be seen). |
| 13 | /// |
| 14 | /// This file defines building blocks for ranking candidates. |
| 15 | /// It's used by the features directly and also in the implementation of |
| 16 | /// indexes, as indexes also need to heuristically limit their results. |
| 17 | /// |
| 18 | /// The facilities here are: |
| 19 | /// - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString |
| 20 | /// These are structured in a way that they can be debugged, and are fairly |
| 21 | /// consistent regardless of the source. |
| 22 | /// - compute scores from scoring signals. These are suitable for sorting. |
| 23 | /// - sorting utilities like the TopN container. |
| 24 | /// These could be split up further to isolate dependencies if we care. |
| 25 | /// |
| 26 | //===---------------------------------------------------------------------===// |
| 27 | #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H |
| 28 | #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H |
| 29 | #include "llvm/ADT/StringRef.h" |
| 30 | #include <algorithm> |
| 31 | #include <functional> |
| 32 | #include <vector> |
| 33 | namespace llvm { |
| 34 | class raw_ostream; |
| 35 | } |
| 36 | namespace clang { |
| 37 | class CodeCompletionResult; |
| 38 | namespace clangd { |
| 39 | struct Symbol; |
| 40 | |
| 41 | // Signals structs are designed to be aggregated from 0 or more sources. |
| 42 | // A default instance has neutral signals, and sources are merged into it. |
| 43 | // They can be dumped for debugging, and evaluate()d into a score. |
| 44 | |
| 45 | /// Attributes of a symbol that affect how much we like it. |
| 46 | struct SymbolQualitySignals { |
| 47 | unsigned SemaCCPriority = 0; // 1-80, 1 is best. 0 means absent. |
| 48 | // FIXME: this is actually a mix of symbol |
| 49 | // quality and relevance. Untangle this. |
| 50 | bool Deprecated = false; |
| 51 | unsigned References = 0; |
| 52 | |
| 53 | void merge(const CodeCompletionResult &SemaCCResult); |
| 54 | void merge(const Symbol &IndexResult); |
| 55 | |
| 56 | // Condense these signals down to a single number, higher is better. |
| 57 | float evaluate() const; |
| 58 | }; |
| 59 | llvm::raw_ostream &operator<<(llvm::raw_ostream &, |
| 60 | const SymbolQualitySignals &); |
| 61 | |
| 62 | /// Attributes of a symbol-query pair that affect how much we like it. |
| 63 | struct SymbolRelevanceSignals { |
| 64 | // 0-1 fuzzy-match score for unqualified name. Must be explicitly assigned. |
| 65 | float NameMatch = 1; |
| 66 | bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private). |
| 67 | |
| 68 | void merge(const CodeCompletionResult &SemaResult); |
| 69 | |
| 70 | // Condense these signals down to a single number, higher is better. |
| 71 | float evaluate() const; |
| 72 | }; |
| 73 | llvm::raw_ostream &operator<<(llvm::raw_ostream &, |
| 74 | const SymbolRelevanceSignals &); |
| 75 | |
| 76 | /// Combine symbol quality and relevance into a single score. |
| 77 | float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance); |
| 78 | |
| 79 | /// TopN<T> is a lossy container that preserves only the "best" N elements. |
| 80 | template <typename T, typename Compare = std::greater<T>> class TopN { |
| 81 | public: |
| 82 | using value_type = T; |
| 83 | TopN(size_t N, Compare Greater = Compare()) |
| 84 | : N(N), Greater(std::move(Greater)) {} |
| 85 | |
| 86 | // Adds a candidate to the set. |
| 87 | // Returns true if a candidate was dropped to get back under N. |
| 88 | bool push(value_type &&V) { |
| 89 | bool Dropped = false; |
| 90 | if (Heap.size() >= N) { |
| 91 | Dropped = true; |
| 92 | if (N > 0 && Greater(V, Heap.front())) { |
| 93 | std::pop_heap(Heap.begin(), Heap.end(), Greater); |
| 94 | Heap.back() = std::move(V); |
| 95 | std::push_heap(Heap.begin(), Heap.end(), Greater); |
| 96 | } |
| 97 | } else { |
| 98 | Heap.push_back(std::move(V)); |
| 99 | std::push_heap(Heap.begin(), Heap.end(), Greater); |
| 100 | } |
| 101 | assert(Heap.size() <= N); |
| 102 | assert(std::is_heap(Heap.begin(), Heap.end(), Greater)); |
| 103 | return Dropped; |
| 104 | } |
| 105 | |
| 106 | // Returns candidates from best to worst. |
| 107 | std::vector<value_type> items() && { |
| 108 | std::sort_heap(Heap.begin(), Heap.end(), Greater); |
| 109 | assert(Heap.size() <= N); |
| 110 | return std::move(Heap); |
| 111 | } |
| 112 | |
| 113 | private: |
| 114 | const size_t N; |
| 115 | std::vector<value_type> Heap; // Min-heap, comparator is Greater. |
| 116 | Compare Greater; |
| 117 | }; |
| 118 | |
Ilya Biryukov | 8573def | 2018-05-30 12:41:19 +0000 | [diff] [blame^] | 119 | /// Returns a string that sorts in the same order as (-Score, Tiebreak), for |
| 120 | /// LSP. (The highest score compares smallest so it sorts at the top). |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 121 | std::string sortText(float Score, llvm::StringRef Tiebreak = ""); |
| 122 | |
| 123 | } // namespace clangd |
| 124 | } // namespace clang |
| 125 | |
| 126 | #endif |