blob: 99731d9408a27c8ee14a0c3ea7c2675844c15fa7 [file] [log] [blame]
Sam McCallc5707b62018-05-15 17:43:27 +00001//===--- Quality.h - Ranking alternatives for ambiguous queries -*- C++-*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===---------------------------------------------------------------------===//
9///
10/// Some operations such as code completion produce a set of candidates.
11/// Usually the user can choose between them, but we should put the best options
12/// at the top (they're easier to select, and more likely to be seen).
13///
14/// This file defines building blocks for ranking candidates.
15/// It's used by the features directly and also in the implementation of
16/// indexes, as indexes also need to heuristically limit their results.
17///
18/// The facilities here are:
19/// - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString
20/// These are structured in a way that they can be debugged, and are fairly
21/// consistent regardless of the source.
22/// - compute scores from scoring signals. These are suitable for sorting.
23/// - sorting utilities like the TopN container.
24/// These could be split up further to isolate dependencies if we care.
25///
26//===---------------------------------------------------------------------===//
27#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
28#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
29#include "llvm/ADT/StringRef.h"
30#include <algorithm>
31#include <functional>
32#include <vector>
33namespace llvm {
34class raw_ostream;
35}
36namespace clang {
37class CodeCompletionResult;
38namespace clangd {
39struct Symbol;
40
41// Signals structs are designed to be aggregated from 0 or more sources.
42// A default instance has neutral signals, and sources are merged into it.
43// They can be dumped for debugging, and evaluate()d into a score.
44
45/// Attributes of a symbol that affect how much we like it.
46struct SymbolQualitySignals {
47 unsigned SemaCCPriority = 0; // 1-80, 1 is best. 0 means absent.
48 // FIXME: this is actually a mix of symbol
49 // quality and relevance. Untangle this.
50 bool Deprecated = false;
51 unsigned References = 0;
52
Sam McCall4a3c69b2018-06-06 08:53:36 +000053 enum SymbolCategory {
54 Variable,
55 Macro,
56 Type,
57 Function,
58 Namespace,
59 Unknown,
60 } Category = Unknown;
61
Sam McCallc5707b62018-05-15 17:43:27 +000062 void merge(const CodeCompletionResult &SemaCCResult);
63 void merge(const Symbol &IndexResult);
64
65 // Condense these signals down to a single number, higher is better.
66 float evaluate() const;
67};
68llvm::raw_ostream &operator<<(llvm::raw_ostream &,
69 const SymbolQualitySignals &);
70
71/// Attributes of a symbol-query pair that affect how much we like it.
72struct SymbolRelevanceSignals {
Sam McCallbc7cbb72018-06-06 12:38:37 +000073 /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
Sam McCallc5707b62018-05-15 17:43:27 +000074 float NameMatch = 1;
75 bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private).
Sam McCalldb41e1c2018-06-05 12:22:43 +000076 /// Proximity between best declaration and the query. [0-1], 1 is closest.
Ilya Biryukovf0296462018-06-04 14:50:59 +000077 float ProximityScore = 0;
Sam McCallc5707b62018-05-15 17:43:27 +000078
Sam McCalld9b54f02018-06-05 16:30:25 +000079 // An approximate measure of where we expect the symbol to be used.
80 enum AccessibleScope {
81 FunctionScope,
82 ClassScope,
83 FileScope,
84 GlobalScope,
85 } Scope = GlobalScope;
86
87 enum QueryType {
88 CodeComplete,
89 Generic,
90 } Query = Generic;
91
Sam McCallc5707b62018-05-15 17:43:27 +000092 void merge(const CodeCompletionResult &SemaResult);
Sam McCalld9b54f02018-06-05 16:30:25 +000093 void merge(const Symbol &IndexResult);
Sam McCallc5707b62018-05-15 17:43:27 +000094
95 // Condense these signals down to a single number, higher is better.
96 float evaluate() const;
97};
98llvm::raw_ostream &operator<<(llvm::raw_ostream &,
99 const SymbolRelevanceSignals &);
100
101/// Combine symbol quality and relevance into a single score.
102float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance);
103
104/// TopN<T> is a lossy container that preserves only the "best" N elements.
105template <typename T, typename Compare = std::greater<T>> class TopN {
106public:
107 using value_type = T;
108 TopN(size_t N, Compare Greater = Compare())
109 : N(N), Greater(std::move(Greater)) {}
110
111 // Adds a candidate to the set.
112 // Returns true if a candidate was dropped to get back under N.
113 bool push(value_type &&V) {
114 bool Dropped = false;
115 if (Heap.size() >= N) {
116 Dropped = true;
117 if (N > 0 && Greater(V, Heap.front())) {
118 std::pop_heap(Heap.begin(), Heap.end(), Greater);
119 Heap.back() = std::move(V);
120 std::push_heap(Heap.begin(), Heap.end(), Greater);
121 }
122 } else {
123 Heap.push_back(std::move(V));
124 std::push_heap(Heap.begin(), Heap.end(), Greater);
125 }
126 assert(Heap.size() <= N);
127 assert(std::is_heap(Heap.begin(), Heap.end(), Greater));
128 return Dropped;
129 }
130
131 // Returns candidates from best to worst.
132 std::vector<value_type> items() && {
133 std::sort_heap(Heap.begin(), Heap.end(), Greater);
134 assert(Heap.size() <= N);
135 return std::move(Heap);
136 }
137
138private:
139 const size_t N;
140 std::vector<value_type> Heap; // Min-heap, comparator is Greater.
141 Compare Greater;
142};
143
Ilya Biryukov8573def2018-05-30 12:41:19 +0000144/// Returns a string that sorts in the same order as (-Score, Tiebreak), for
145/// LSP. (The highest score compares smallest so it sorts at the top).
Sam McCallc5707b62018-05-15 17:43:27 +0000146std::string sortText(float Score, llvm::StringRef Tiebreak = "");
147
148} // namespace clangd
149} // namespace clang
150
151#endif