blob: 0aed496be6a01626e662ef64f1a8f7833cc07299 [file] [log] [blame]
Sam McCallc5707b62018-05-15 17:43:27 +00001//===--- Quality.h - Ranking alternatives for ambiguous queries -*- C++-*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===---------------------------------------------------------------------===//
9///
10/// Some operations such as code completion produce a set of candidates.
11/// Usually the user can choose between them, but we should put the best options
12/// at the top (they're easier to select, and more likely to be seen).
13///
14/// This file defines building blocks for ranking candidates.
15/// It's used by the features directly and also in the implementation of
16/// indexes, as indexes also need to heuristically limit their results.
17///
18/// The facilities here are:
19/// - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString
20/// These are structured in a way that they can be debugged, and are fairly
21/// consistent regardless of the source.
22/// - compute scores from scoring signals. These are suitable for sorting.
23/// - sorting utilities like the TopN container.
24/// These could be split up further to isolate dependencies if we care.
25///
26//===---------------------------------------------------------------------===//
27#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
28#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
Eric Liu5d2a8072018-07-23 10:56:37 +000029#include "clang/Sema/CodeCompleteConsumer.h"
Eric Liu09c3c372018-06-15 08:58:12 +000030#include "llvm/ADT/ArrayRef.h"
Sam McCallc5707b62018-05-15 17:43:27 +000031#include "llvm/ADT/StringRef.h"
32#include <algorithm>
33#include <functional>
34#include <vector>
35namespace llvm {
36class raw_ostream;
37}
38namespace clang {
39class CodeCompletionResult;
40namespace clangd {
41struct Symbol;
Sam McCall3f0243f2018-07-03 08:09:29 +000042class URIDistance;
Sam McCallc5707b62018-05-15 17:43:27 +000043
44// Signals structs are designed to be aggregated from 0 or more sources.
45// A default instance has neutral signals, and sources are merged into it.
46// They can be dumped for debugging, and evaluate()d into a score.
47
48/// Attributes of a symbol that affect how much we like it.
49struct SymbolQualitySignals {
Sam McCallc5707b62018-05-15 17:43:27 +000050 bool Deprecated = false;
Sam McCalle018b362018-06-08 09:36:34 +000051 bool ReservedName = false; // __foo, _Foo are usually implementation details.
52 // FIXME: make these findable once user types _.
Sam McCallc5707b62018-05-15 17:43:27 +000053 unsigned References = 0;
54
Sam McCall4a3c69b2018-06-06 08:53:36 +000055 enum SymbolCategory {
Sam McCallc3b5bad2018-06-14 13:42:21 +000056 Unknown = 0,
Sam McCall4a3c69b2018-06-06 08:53:36 +000057 Variable,
58 Macro,
59 Type,
60 Function,
Eric Liud7de8112018-07-24 08:51:52 +000061 Constructor,
Sam McCall4a3c69b2018-06-06 08:53:36 +000062 Namespace,
Sam McCallc3b5bad2018-06-14 13:42:21 +000063 Keyword,
Sam McCall4a3c69b2018-06-06 08:53:36 +000064 } Category = Unknown;
65
Sam McCallc5707b62018-05-15 17:43:27 +000066 void merge(const CodeCompletionResult &SemaCCResult);
67 void merge(const Symbol &IndexResult);
68
69 // Condense these signals down to a single number, higher is better.
70 float evaluate() const;
71};
72llvm::raw_ostream &operator<<(llvm::raw_ostream &,
73 const SymbolQualitySignals &);
74
75/// Attributes of a symbol-query pair that affect how much we like it.
76struct SymbolRelevanceSignals {
Sam McCallbc7cbb72018-06-06 12:38:37 +000077 /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
Sam McCallc5707b62018-05-15 17:43:27 +000078 float NameMatch = 1;
79 bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private).
Eric Liu09c3c372018-06-15 08:58:12 +000080
Sam McCall3f0243f2018-07-03 08:09:29 +000081 URIDistance *FileProximityMatch = nullptr;
Eric Liu09c3c372018-06-15 08:58:12 +000082 /// This is used to calculate proximity between the index symbol and the
83 /// query.
84 llvm::StringRef SymbolURI;
Sam McCalldb41e1c2018-06-05 12:22:43 +000085 /// Proximity between best declaration and the query. [0-1], 1 is closest.
Eric Liu09c3c372018-06-15 08:58:12 +000086 /// FIXME: unify with index proximity score - signals should be
87 /// source-independent.
88 float SemaProximityScore = 0;
Sam McCallc5707b62018-05-15 17:43:27 +000089
Sam McCalld9b54f02018-06-05 16:30:25 +000090 // An approximate measure of where we expect the symbol to be used.
91 enum AccessibleScope {
92 FunctionScope,
93 ClassScope,
94 FileScope,
95 GlobalScope,
96 } Scope = GlobalScope;
97
98 enum QueryType {
99 CodeComplete,
100 Generic,
101 } Query = Generic;
102
Eric Liu5d2a8072018-07-23 10:56:37 +0000103 CodeCompletionContext::Kind Context = CodeCompletionContext::CCC_Other;
104
105 // Whether symbol is an instance member of a class.
106 bool IsInstanceMember = false;
107
Sam McCallc5707b62018-05-15 17:43:27 +0000108 void merge(const CodeCompletionResult &SemaResult);
Sam McCalld9b54f02018-06-05 16:30:25 +0000109 void merge(const Symbol &IndexResult);
Sam McCallc5707b62018-05-15 17:43:27 +0000110
111 // Condense these signals down to a single number, higher is better.
112 float evaluate() const;
113};
114llvm::raw_ostream &operator<<(llvm::raw_ostream &,
115 const SymbolRelevanceSignals &);
116
117/// Combine symbol quality and relevance into a single score.
118float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance);
119
120/// TopN<T> is a lossy container that preserves only the "best" N elements.
121template <typename T, typename Compare = std::greater<T>> class TopN {
122public:
123 using value_type = T;
124 TopN(size_t N, Compare Greater = Compare())
125 : N(N), Greater(std::move(Greater)) {}
126
127 // Adds a candidate to the set.
128 // Returns true if a candidate was dropped to get back under N.
129 bool push(value_type &&V) {
130 bool Dropped = false;
131 if (Heap.size() >= N) {
132 Dropped = true;
133 if (N > 0 && Greater(V, Heap.front())) {
134 std::pop_heap(Heap.begin(), Heap.end(), Greater);
135 Heap.back() = std::move(V);
136 std::push_heap(Heap.begin(), Heap.end(), Greater);
137 }
138 } else {
139 Heap.push_back(std::move(V));
140 std::push_heap(Heap.begin(), Heap.end(), Greater);
141 }
142 assert(Heap.size() <= N);
143 assert(std::is_heap(Heap.begin(), Heap.end(), Greater));
144 return Dropped;
145 }
146
147 // Returns candidates from best to worst.
148 std::vector<value_type> items() && {
149 std::sort_heap(Heap.begin(), Heap.end(), Greater);
150 assert(Heap.size() <= N);
151 return std::move(Heap);
152 }
153
154private:
155 const size_t N;
156 std::vector<value_type> Heap; // Min-heap, comparator is Greater.
157 Compare Greater;
158};
159
Ilya Biryukov8573def2018-05-30 12:41:19 +0000160/// Returns a string that sorts in the same order as (-Score, Tiebreak), for
161/// LSP. (The highest score compares smallest so it sorts at the top).
Sam McCallc5707b62018-05-15 17:43:27 +0000162std::string sortText(float Score, llvm::StringRef Tiebreak = "");
163
164} // namespace clangd
165} // namespace clang
166
167#endif