blob: 237b7d54513e6a2f99de01cc96411f11140c284f [file] [log] [blame]
Sam McCallc5707b62018-05-15 17:43:27 +00001//===--- Quality.cpp --------------------------------------------*- C++-*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===---------------------------------------------------------------------===//
9#include "Quality.h"
Sam McCall3f0243f2018-07-03 08:09:29 +000010#include "FileDistance.h"
Eric Liu09c3c372018-06-15 08:58:12 +000011#include "URI.h"
Sam McCallc5707b62018-05-15 17:43:27 +000012#include "index/Index.h"
Ilya Biryukovf0296462018-06-04 14:50:59 +000013#include "clang/AST/ASTContext.h"
Sam McCall4a3c69b2018-06-06 08:53:36 +000014#include "clang/AST/DeclVisitor.h"
Sam McCall3f0243f2018-07-03 08:09:29 +000015#include "clang/Basic/CharInfo.h"
Ilya Biryukovf0296462018-06-04 14:50:59 +000016#include "clang/Basic/SourceManager.h"
Sam McCallc5707b62018-05-15 17:43:27 +000017#include "clang/Sema/CodeCompleteConsumer.h"
18#include "llvm/Support/FormatVariadic.h"
19#include "llvm/Support/MathExtras.h"
20#include "llvm/Support/raw_ostream.h"
Sam McCall3f0243f2018-07-03 08:09:29 +000021#include <cmath>
Sam McCallc5707b62018-05-15 17:43:27 +000022
23namespace clang {
24namespace clangd {
25using namespace llvm;
Sam McCalle018b362018-06-08 09:36:34 +000026static bool IsReserved(StringRef Name) {
27 // FIXME: Should we exclude _Bool and others recognized by the standard?
28 return Name.size() >= 2 && Name[0] == '_' &&
29 (isUppercase(Name[1]) || Name[1] == '_');
30}
Sam McCallc5707b62018-05-15 17:43:27 +000031
Ilya Biryukovf0296462018-06-04 14:50:59 +000032static bool hasDeclInMainFile(const Decl &D) {
33 auto &SourceMgr = D.getASTContext().getSourceManager();
34 for (auto *Redecl : D.redecls()) {
35 auto Loc = SourceMgr.getSpellingLoc(Redecl->getLocation());
36 if (SourceMgr.isWrittenInMainFile(Loc))
37 return true;
38 }
39 return false;
40}
41
Sam McCall4a3c69b2018-06-06 08:53:36 +000042static SymbolQualitySignals::SymbolCategory categorize(const NamedDecl &ND) {
43 class Switch
44 : public ConstDeclVisitor<Switch, SymbolQualitySignals::SymbolCategory> {
45 public:
46#define MAP(DeclType, Category) \
47 SymbolQualitySignals::SymbolCategory Visit##DeclType(const DeclType *) { \
48 return SymbolQualitySignals::Category; \
49 }
50 MAP(NamespaceDecl, Namespace);
51 MAP(NamespaceAliasDecl, Namespace);
52 MAP(TypeDecl, Type);
53 MAP(TypeAliasTemplateDecl, Type);
54 MAP(ClassTemplateDecl, Type);
55 MAP(ValueDecl, Variable);
56 MAP(VarTemplateDecl, Variable);
57 MAP(FunctionDecl, Function);
58 MAP(FunctionTemplateDecl, Function);
59 MAP(Decl, Unknown);
60#undef MAP
61 };
62 return Switch().Visit(&ND);
63}
64
Sam McCallc3b5bad2018-06-14 13:42:21 +000065static SymbolQualitySignals::SymbolCategory categorize(const CodeCompletionResult &R) {
66 if (R.Declaration)
67 return categorize(*R.Declaration);
68 if (R.Kind == CodeCompletionResult::RK_Macro)
69 return SymbolQualitySignals::Macro;
70 // Everything else is a keyword or a pattern. Patterns are mostly keywords
71 // too, except a few which we recognize by cursor kind.
72 switch (R.CursorKind) {
73 case CXCursor_CXXMethod:
74 return SymbolQualitySignals::Function;
75 case CXCursor_ModuleImportDecl:
76 return SymbolQualitySignals::Namespace;
77 case CXCursor_MacroDefinition:
78 return SymbolQualitySignals::Macro;
79 case CXCursor_TypeRef:
80 return SymbolQualitySignals::Type;
81 case CXCursor_MemberRef:
82 return SymbolQualitySignals::Variable;
83 default:
84 return SymbolQualitySignals::Keyword;
85 }
86}
87
Sam McCall4a3c69b2018-06-06 08:53:36 +000088static SymbolQualitySignals::SymbolCategory
89categorize(const index::SymbolInfo &D) {
90 switch (D.Kind) {
91 case index::SymbolKind::Namespace:
92 case index::SymbolKind::NamespaceAlias:
93 return SymbolQualitySignals::Namespace;
94 case index::SymbolKind::Macro:
95 return SymbolQualitySignals::Macro;
96 case index::SymbolKind::Enum:
97 case index::SymbolKind::Struct:
98 case index::SymbolKind::Class:
99 case index::SymbolKind::Protocol:
100 case index::SymbolKind::Extension:
101 case index::SymbolKind::Union:
102 case index::SymbolKind::TypeAlias:
103 return SymbolQualitySignals::Type;
104 case index::SymbolKind::Function:
105 case index::SymbolKind::ClassMethod:
106 case index::SymbolKind::InstanceMethod:
107 case index::SymbolKind::StaticMethod:
108 case index::SymbolKind::InstanceProperty:
109 case index::SymbolKind::ClassProperty:
110 case index::SymbolKind::StaticProperty:
111 case index::SymbolKind::Constructor:
112 case index::SymbolKind::Destructor:
113 case index::SymbolKind::ConversionFunction:
114 return SymbolQualitySignals::Function;
115 case index::SymbolKind::Variable:
116 case index::SymbolKind::Field:
117 case index::SymbolKind::EnumConstant:
118 case index::SymbolKind::Parameter:
119 return SymbolQualitySignals::Variable;
120 case index::SymbolKind::Using:
121 case index::SymbolKind::Module:
122 case index::SymbolKind::Unknown:
123 return SymbolQualitySignals::Unknown;
124 }
Tim Northover0698e962018-06-06 13:28:49 +0000125 llvm_unreachable("Unknown index::SymbolKind");
Sam McCall4a3c69b2018-06-06 08:53:36 +0000126}
127
Sam McCallc5707b62018-05-15 17:43:27 +0000128void SymbolQualitySignals::merge(const CodeCompletionResult &SemaCCResult) {
Sam McCallc5707b62018-05-15 17:43:27 +0000129 if (SemaCCResult.Availability == CXAvailability_Deprecated)
130 Deprecated = true;
Sam McCall4a3c69b2018-06-06 08:53:36 +0000131
Sam McCallc3b5bad2018-06-14 13:42:21 +0000132 Category = categorize(SemaCCResult);
Sam McCalle018b362018-06-08 09:36:34 +0000133
134 if (SemaCCResult.Declaration) {
135 if (auto *ID = SemaCCResult.Declaration->getIdentifier())
136 ReservedName = ReservedName || IsReserved(ID->getName());
137 } else if (SemaCCResult.Kind == CodeCompletionResult::RK_Macro)
138 ReservedName = ReservedName || IsReserved(SemaCCResult.Macro->getName());
Sam McCallc5707b62018-05-15 17:43:27 +0000139}
140
141void SymbolQualitySignals::merge(const Symbol &IndexResult) {
142 References = std::max(IndexResult.References, References);
Sam McCall4a3c69b2018-06-06 08:53:36 +0000143 Category = categorize(IndexResult.SymInfo);
Sam McCalle018b362018-06-08 09:36:34 +0000144 ReservedName = ReservedName || IsReserved(IndexResult.Name);
Sam McCallc5707b62018-05-15 17:43:27 +0000145}
146
147float SymbolQualitySignals::evaluate() const {
148 float Score = 1;
149
150 // This avoids a sharp gradient for tail symbols, and also neatly avoids the
151 // question of whether 0 references means a bad symbol or missing data.
Eric Liucdc5f6a2018-06-28 16:51:12 +0000152 if (References >= 10)
153 Score *= std::log10(References);
Sam McCallc5707b62018-05-15 17:43:27 +0000154
Sam McCallc5707b62018-05-15 17:43:27 +0000155 if (Deprecated)
Aaron Ballman215e4712018-05-18 13:18:41 +0000156 Score *= 0.1f;
Sam McCalle018b362018-06-08 09:36:34 +0000157 if (ReservedName)
158 Score *= 0.1f;
Sam McCallc5707b62018-05-15 17:43:27 +0000159
Sam McCall4a3c69b2018-06-06 08:53:36 +0000160 switch (Category) {
Sam McCallabe37372018-06-27 11:43:54 +0000161 case Keyword: // Often relevant, but misses most signals.
162 Score *= 4; // FIXME: important keywords should have specific boosts.
Sam McCallc3b5bad2018-06-14 13:42:21 +0000163 break;
Sam McCall4a3c69b2018-06-06 08:53:36 +0000164 case Type:
165 case Function:
166 case Variable:
Simon Pilgrim0c9e1c82018-06-06 12:48:27 +0000167 Score *= 1.1f;
Sam McCall4a3c69b2018-06-06 08:53:36 +0000168 break;
169 case Namespace:
Simon Pilgrim0c9e1c82018-06-06 12:48:27 +0000170 Score *= 0.8f;
Sam McCallbc7cbb72018-06-06 12:38:37 +0000171 break;
Sam McCall4a3c69b2018-06-06 08:53:36 +0000172 case Macro:
Simon Pilgrim0c9e1c82018-06-06 12:48:27 +0000173 Score *= 0.2f;
Sam McCall4a3c69b2018-06-06 08:53:36 +0000174 break;
175 case Unknown:
176 break;
177 }
178
Sam McCallc5707b62018-05-15 17:43:27 +0000179 return Score;
180}
181
182raw_ostream &operator<<(raw_ostream &OS, const SymbolQualitySignals &S) {
183 OS << formatv("=== Symbol quality: {0}\n", S.evaluate());
Sam McCallc5707b62018-05-15 17:43:27 +0000184 OS << formatv("\tReferences: {0}\n", S.References);
185 OS << formatv("\tDeprecated: {0}\n", S.Deprecated);
Sam McCalle018b362018-06-08 09:36:34 +0000186 OS << formatv("\tReserved name: {0}\n", S.ReservedName);
Sam McCall4a3c69b2018-06-06 08:53:36 +0000187 OS << formatv("\tCategory: {0}\n", static_cast<int>(S.Category));
Sam McCallc5707b62018-05-15 17:43:27 +0000188 return OS;
189}
190
Sam McCalld9b54f02018-06-05 16:30:25 +0000191static SymbolRelevanceSignals::AccessibleScope
Sam McCallabe37372018-06-27 11:43:54 +0000192ComputeScope(const NamedDecl *D) {
193 // Injected "Foo" within the class "Foo" has file scope, not class scope.
194 const DeclContext *DC = D->getDeclContext();
195 if (auto *R = dyn_cast_or_null<RecordDecl>(D))
196 if (R->isInjectedClassName())
197 DC = DC->getParent();
Sam McCall89f52932018-06-05 18:00:48 +0000198 bool InClass = false;
Sam McCallabe37372018-06-27 11:43:54 +0000199 for (; !DC->isFileContext(); DC = DC->getParent()) {
Sam McCalld9b54f02018-06-05 16:30:25 +0000200 if (DC->isFunctionOrMethod())
201 return SymbolRelevanceSignals::FunctionScope;
202 InClass = InClass || DC->isRecord();
203 }
204 if (InClass)
205 return SymbolRelevanceSignals::ClassScope;
206 // This threshold could be tweaked, e.g. to treat module-visible as global.
Sam McCallabe37372018-06-27 11:43:54 +0000207 if (D->getLinkageInternal() < ExternalLinkage)
Sam McCalld9b54f02018-06-05 16:30:25 +0000208 return SymbolRelevanceSignals::FileScope;
209 return SymbolRelevanceSignals::GlobalScope;
210}
211
212void SymbolRelevanceSignals::merge(const Symbol &IndexResult) {
213 // FIXME: Index results always assumed to be at global scope. If Scope becomes
214 // relevant to non-completion requests, we should recognize class members etc.
Eric Liu09c3c372018-06-15 08:58:12 +0000215
216 SymbolURI = IndexResult.CanonicalDeclaration.FileURI;
Sam McCalld9b54f02018-06-05 16:30:25 +0000217}
218
Sam McCallc5707b62018-05-15 17:43:27 +0000219void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) {
220 if (SemaCCResult.Availability == CXAvailability_NotAvailable ||
221 SemaCCResult.Availability == CXAvailability_NotAccessible)
222 Forbidden = true;
Ilya Biryukovf0296462018-06-04 14:50:59 +0000223
224 if (SemaCCResult.Declaration) {
Eric Liu09c3c372018-06-15 08:58:12 +0000225 // We boost things that have decls in the main file. We give a fixed score
226 // for all other declarations in sema as they are already included in the
227 // translation unit.
Ilya Biryukovf0296462018-06-04 14:50:59 +0000228 float DeclProximity =
Eric Liu09c3c372018-06-15 08:58:12 +0000229 hasDeclInMainFile(*SemaCCResult.Declaration) ? 1.0 : 0.6;
230 SemaProximityScore = std::max(DeclProximity, SemaProximityScore);
Ilya Biryukovf0296462018-06-04 14:50:59 +0000231 }
Sam McCalld9b54f02018-06-05 16:30:25 +0000232
233 // Declarations are scoped, others (like macros) are assumed global.
Sam McCall661d89c2018-06-05 17:58:12 +0000234 if (SemaCCResult.Declaration)
Sam McCallabe37372018-06-27 11:43:54 +0000235 Scope = std::min(Scope, ComputeScope(SemaCCResult.Declaration));
Sam McCallc5707b62018-05-15 17:43:27 +0000236}
237
Sam McCall3f0243f2018-07-03 08:09:29 +0000238static std::pair<float, unsigned> proximityScore(llvm::StringRef SymbolURI,
239 URIDistance *D) {
240 if (!D || SymbolURI.empty())
241 return {0.f, 0u};
242 unsigned Distance = D->distance(SymbolURI);
243 // Assume approximately default options are used for sensible scoring.
244 return {std::exp(Distance * -0.4f / FileDistanceOptions().UpCost), Distance};
245}
246
Sam McCallc5707b62018-05-15 17:43:27 +0000247float SymbolRelevanceSignals::evaluate() const {
Sam McCalld9b54f02018-06-05 16:30:25 +0000248 float Score = 1;
249
Sam McCallc5707b62018-05-15 17:43:27 +0000250 if (Forbidden)
251 return 0;
Ilya Biryukovf0296462018-06-04 14:50:59 +0000252
Sam McCalld9b54f02018-06-05 16:30:25 +0000253 Score *= NameMatch;
254
Ilya Biryukovf0296462018-06-04 14:50:59 +0000255 // Proximity scores are [0,1] and we translate them into a multiplier in the
Sam McCall3f0243f2018-07-03 08:09:29 +0000256 // range from 1 to 3.
257 Score *= 1 + 2 * std::max(proximityScore(SymbolURI, FileProximityMatch).first,
258 SemaProximityScore);
Sam McCalld9b54f02018-06-05 16:30:25 +0000259
260 // Symbols like local variables may only be referenced within their scope.
261 // Conversely if we're in that scope, it's likely we'll reference them.
262 if (Query == CodeComplete) {
263 // The narrower the scope where a symbol is visible, the more likely it is
264 // to be relevant when it is available.
265 switch (Scope) {
266 case GlobalScope:
267 break;
268 case FileScope:
269 Score *= 1.5;
Sam McCallc22c9aa2018-06-07 08:16:36 +0000270 break;
Sam McCalld9b54f02018-06-05 16:30:25 +0000271 case ClassScope:
272 Score *= 2;
Sam McCallc22c9aa2018-06-07 08:16:36 +0000273 break;
Sam McCalld9b54f02018-06-05 16:30:25 +0000274 case FunctionScope:
275 Score *= 4;
Sam McCallc22c9aa2018-06-07 08:16:36 +0000276 break;
Sam McCalld9b54f02018-06-05 16:30:25 +0000277 }
278 }
279
Ilya Biryukovf0296462018-06-04 14:50:59 +0000280 return Score;
Sam McCallc5707b62018-05-15 17:43:27 +0000281}
Eric Liu09c3c372018-06-15 08:58:12 +0000282
Sam McCallc5707b62018-05-15 17:43:27 +0000283raw_ostream &operator<<(raw_ostream &OS, const SymbolRelevanceSignals &S) {
284 OS << formatv("=== Symbol relevance: {0}\n", S.evaluate());
285 OS << formatv("\tName match: {0}\n", S.NameMatch);
286 OS << formatv("\tForbidden: {0}\n", S.Forbidden);
Eric Liu09c3c372018-06-15 08:58:12 +0000287 OS << formatv("\tSymbol URI: {0}\n", S.SymbolURI);
288 if (S.FileProximityMatch) {
Sam McCall3f0243f2018-07-03 08:09:29 +0000289 auto Score = proximityScore(S.SymbolURI, S.FileProximityMatch);
290 OS << formatv("\tIndex proximity: {0} (distance={1})\n", Score.first,
291 Score.second);
Eric Liu09c3c372018-06-15 08:58:12 +0000292 }
293 OS << formatv("\tSema proximity: {0}\n", S.SemaProximityScore);
Sam McCall661d89c2018-06-05 17:58:12 +0000294 OS << formatv("\tQuery type: {0}\n", static_cast<int>(S.Query));
295 OS << formatv("\tScope: {0}\n", static_cast<int>(S.Scope));
Sam McCallc5707b62018-05-15 17:43:27 +0000296 return OS;
297}
298
299float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance) {
300 return SymbolQuality * SymbolRelevance;
301}
302
303// Produces an integer that sorts in the same order as F.
304// That is: a < b <==> encodeFloat(a) < encodeFloat(b).
305static uint32_t encodeFloat(float F) {
306 static_assert(std::numeric_limits<float>::is_iec559, "");
307 constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1);
308
309 // Get the bits of the float. Endianness is the same as for integers.
310 uint32_t U = FloatToBits(F);
311 // IEEE 754 floats compare like sign-magnitude integers.
312 if (U & TopBit) // Negative float.
313 return 0 - U; // Map onto the low half of integers, order reversed.
314 return U + TopBit; // Positive floats map onto the high half of integers.
315}
316
317std::string sortText(float Score, llvm::StringRef Name) {
318 // We convert -Score to an integer, and hex-encode for readability.
319 // Example: [0.5, "foo"] -> "41000000foo"
320 std::string S;
321 llvm::raw_string_ostream OS(S);
322 write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower,
323 /*Width=*/2 * sizeof(Score));
324 OS << Name;
325 OS.flush();
326 return S;
327}
328
329} // namespace clangd
330} // namespace clang