blob: 61195b5666063a4681e9ab76b9faed14b29a2953 [file] [log] [blame]
Sam McCallc5707b62018-05-15 17:43:27 +00001//===--- Quality.cpp --------------------------------------------*- C++-*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===---------------------------------------------------------------------===//
9#include "Quality.h"
Sam McCall3f0243f2018-07-03 08:09:29 +000010#include "FileDistance.h"
Eric Liu09c3c372018-06-15 08:58:12 +000011#include "URI.h"
Sam McCallc5707b62018-05-15 17:43:27 +000012#include "index/Index.h"
Ilya Biryukovf0296462018-06-04 14:50:59 +000013#include "clang/AST/ASTContext.h"
Eric Liu5d2a8072018-07-23 10:56:37 +000014#include "clang/AST/Decl.h"
Eric Liu8944f0e2018-07-05 08:14:04 +000015#include "clang/AST/DeclCXX.h"
Eric Liu5d2a8072018-07-23 10:56:37 +000016#include "clang/AST/DeclTemplate.h"
Sam McCall4a3c69b2018-06-06 08:53:36 +000017#include "clang/AST/DeclVisitor.h"
Sam McCall3f0243f2018-07-03 08:09:29 +000018#include "clang/Basic/CharInfo.h"
Ilya Biryukovf0296462018-06-04 14:50:59 +000019#include "clang/Basic/SourceManager.h"
Sam McCallc5707b62018-05-15 17:43:27 +000020#include "clang/Sema/CodeCompleteConsumer.h"
Eric Liu8944f0e2018-07-05 08:14:04 +000021#include "llvm/Support/Casting.h"
Sam McCallc5707b62018-05-15 17:43:27 +000022#include "llvm/Support/FormatVariadic.h"
23#include "llvm/Support/MathExtras.h"
24#include "llvm/Support/raw_ostream.h"
Sam McCall3f0243f2018-07-03 08:09:29 +000025#include <cmath>
Sam McCallc5707b62018-05-15 17:43:27 +000026
27namespace clang {
28namespace clangd {
29using namespace llvm;
Ilya Biryukov74f26552018-07-26 12:05:31 +000030static bool isReserved(StringRef Name) {
Sam McCalle018b362018-06-08 09:36:34 +000031 // FIXME: Should we exclude _Bool and others recognized by the standard?
32 return Name.size() >= 2 && Name[0] == '_' &&
33 (isUppercase(Name[1]) || Name[1] == '_');
34}
Sam McCallc5707b62018-05-15 17:43:27 +000035
Ilya Biryukovf0296462018-06-04 14:50:59 +000036static bool hasDeclInMainFile(const Decl &D) {
37 auto &SourceMgr = D.getASTContext().getSourceManager();
38 for (auto *Redecl : D.redecls()) {
39 auto Loc = SourceMgr.getSpellingLoc(Redecl->getLocation());
40 if (SourceMgr.isWrittenInMainFile(Loc))
41 return true;
42 }
43 return false;
44}
45
Kirill Bobyrev47d7f522018-07-11 14:49:49 +000046static bool hasUsingDeclInMainFile(const CodeCompletionResult &R) {
47 const auto &Context = R.Declaration->getASTContext();
48 const auto &SourceMgr = Context.getSourceManager();
49 if (R.ShadowDecl) {
50 const auto Loc = SourceMgr.getExpansionLoc(R.ShadowDecl->getLocation());
51 if (SourceMgr.isWrittenInMainFile(Loc))
52 return true;
53 }
54 return false;
55}
56
Sam McCall4a3c69b2018-06-06 08:53:36 +000057static SymbolQualitySignals::SymbolCategory categorize(const NamedDecl &ND) {
58 class Switch
59 : public ConstDeclVisitor<Switch, SymbolQualitySignals::SymbolCategory> {
60 public:
61#define MAP(DeclType, Category) \
62 SymbolQualitySignals::SymbolCategory Visit##DeclType(const DeclType *) { \
63 return SymbolQualitySignals::Category; \
64 }
65 MAP(NamespaceDecl, Namespace);
66 MAP(NamespaceAliasDecl, Namespace);
67 MAP(TypeDecl, Type);
68 MAP(TypeAliasTemplateDecl, Type);
69 MAP(ClassTemplateDecl, Type);
Eric Liud7de8112018-07-24 08:51:52 +000070 MAP(CXXConstructorDecl, Constructor);
Sam McCall4a3c69b2018-06-06 08:53:36 +000071 MAP(ValueDecl, Variable);
72 MAP(VarTemplateDecl, Variable);
73 MAP(FunctionDecl, Function);
74 MAP(FunctionTemplateDecl, Function);
75 MAP(Decl, Unknown);
76#undef MAP
77 };
78 return Switch().Visit(&ND);
79}
80
Kirill Bobyrev7cf29bc2018-07-05 09:37:26 +000081static SymbolQualitySignals::SymbolCategory
82categorize(const CodeCompletionResult &R) {
Sam McCallc3b5bad2018-06-14 13:42:21 +000083 if (R.Declaration)
84 return categorize(*R.Declaration);
85 if (R.Kind == CodeCompletionResult::RK_Macro)
86 return SymbolQualitySignals::Macro;
87 // Everything else is a keyword or a pattern. Patterns are mostly keywords
88 // too, except a few which we recognize by cursor kind.
89 switch (R.CursorKind) {
Kirill Bobyrev7cf29bc2018-07-05 09:37:26 +000090 case CXCursor_CXXMethod:
91 return SymbolQualitySignals::Function;
92 case CXCursor_ModuleImportDecl:
93 return SymbolQualitySignals::Namespace;
94 case CXCursor_MacroDefinition:
95 return SymbolQualitySignals::Macro;
96 case CXCursor_TypeRef:
97 return SymbolQualitySignals::Type;
98 case CXCursor_MemberRef:
99 return SymbolQualitySignals::Variable;
Eric Liud7de8112018-07-24 08:51:52 +0000100 case CXCursor_Constructor:
101 return SymbolQualitySignals::Constructor;
Kirill Bobyrev7cf29bc2018-07-05 09:37:26 +0000102 default:
103 return SymbolQualitySignals::Keyword;
Sam McCallc3b5bad2018-06-14 13:42:21 +0000104 }
105}
106
Sam McCall4a3c69b2018-06-06 08:53:36 +0000107static SymbolQualitySignals::SymbolCategory
108categorize(const index::SymbolInfo &D) {
109 switch (D.Kind) {
Kirill Bobyrev7cf29bc2018-07-05 09:37:26 +0000110 case index::SymbolKind::Namespace:
111 case index::SymbolKind::NamespaceAlias:
112 return SymbolQualitySignals::Namespace;
113 case index::SymbolKind::Macro:
114 return SymbolQualitySignals::Macro;
115 case index::SymbolKind::Enum:
116 case index::SymbolKind::Struct:
117 case index::SymbolKind::Class:
118 case index::SymbolKind::Protocol:
119 case index::SymbolKind::Extension:
120 case index::SymbolKind::Union:
121 case index::SymbolKind::TypeAlias:
122 return SymbolQualitySignals::Type;
123 case index::SymbolKind::Function:
124 case index::SymbolKind::ClassMethod:
125 case index::SymbolKind::InstanceMethod:
126 case index::SymbolKind::StaticMethod:
127 case index::SymbolKind::InstanceProperty:
128 case index::SymbolKind::ClassProperty:
129 case index::SymbolKind::StaticProperty:
Kirill Bobyrev7cf29bc2018-07-05 09:37:26 +0000130 case index::SymbolKind::Destructor:
131 case index::SymbolKind::ConversionFunction:
132 return SymbolQualitySignals::Function;
Eric Liud7de8112018-07-24 08:51:52 +0000133 case index::SymbolKind::Constructor:
134 return SymbolQualitySignals::Constructor;
Kirill Bobyrev7cf29bc2018-07-05 09:37:26 +0000135 case index::SymbolKind::Variable:
136 case index::SymbolKind::Field:
137 case index::SymbolKind::EnumConstant:
138 case index::SymbolKind::Parameter:
139 return SymbolQualitySignals::Variable;
140 case index::SymbolKind::Using:
141 case index::SymbolKind::Module:
142 case index::SymbolKind::Unknown:
143 return SymbolQualitySignals::Unknown;
Sam McCall4a3c69b2018-06-06 08:53:36 +0000144 }
Tim Northover0698e962018-06-06 13:28:49 +0000145 llvm_unreachable("Unknown index::SymbolKind");
Sam McCall4a3c69b2018-06-06 08:53:36 +0000146}
147
Eric Liu5d2a8072018-07-23 10:56:37 +0000148static bool isInstanceMember(const NamedDecl *ND) {
149 if (!ND)
150 return false;
151 if (const auto *TP = dyn_cast<FunctionTemplateDecl>(ND))
152 ND = TP->TemplateDecl::getTemplatedDecl();
153 if (const auto *CM = dyn_cast<CXXMethodDecl>(ND))
154 return !CM->isStatic();
155 return isa<FieldDecl>(ND); // Note that static fields are VarDecl.
156}
157
158static bool isInstanceMember(const index::SymbolInfo &D) {
159 switch (D.Kind) {
160 case index::SymbolKind::InstanceMethod:
161 case index::SymbolKind::InstanceProperty:
162 case index::SymbolKind::Field:
163 return true;
164 default:
165 return false;
166 }
167}
168
Sam McCallc5707b62018-05-15 17:43:27 +0000169void SymbolQualitySignals::merge(const CodeCompletionResult &SemaCCResult) {
Sam McCallc5707b62018-05-15 17:43:27 +0000170 if (SemaCCResult.Availability == CXAvailability_Deprecated)
171 Deprecated = true;
Sam McCall4a3c69b2018-06-06 08:53:36 +0000172
Sam McCallc3b5bad2018-06-14 13:42:21 +0000173 Category = categorize(SemaCCResult);
Sam McCalle018b362018-06-08 09:36:34 +0000174
175 if (SemaCCResult.Declaration) {
176 if (auto *ID = SemaCCResult.Declaration->getIdentifier())
Ilya Biryukov74f26552018-07-26 12:05:31 +0000177 ReservedName = ReservedName || isReserved(ID->getName());
Sam McCalle018b362018-06-08 09:36:34 +0000178 } else if (SemaCCResult.Kind == CodeCompletionResult::RK_Macro)
Ilya Biryukov74f26552018-07-26 12:05:31 +0000179 ReservedName = ReservedName || isReserved(SemaCCResult.Macro->getName());
Sam McCallc5707b62018-05-15 17:43:27 +0000180}
181
182void SymbolQualitySignals::merge(const Symbol &IndexResult) {
183 References = std::max(IndexResult.References, References);
Sam McCall4a3c69b2018-06-06 08:53:36 +0000184 Category = categorize(IndexResult.SymInfo);
Ilya Biryukov74f26552018-07-26 12:05:31 +0000185 ReservedName = ReservedName || isReserved(IndexResult.Name);
Sam McCallc5707b62018-05-15 17:43:27 +0000186}
187
188float SymbolQualitySignals::evaluate() const {
189 float Score = 1;
190
191 // This avoids a sharp gradient for tail symbols, and also neatly avoids the
192 // question of whether 0 references means a bad symbol or missing data.
Eric Liu84bd5db2018-07-25 11:26:35 +0000193 if (References >= 10) {
194 // Use a sigmoid style boosting function, which flats out nicely for large
195 // numbers (e.g. 2.58 for 1M refererences).
196 // The following boosting function is equivalent to:
197 // m = 0.06
198 // f = 12.0
199 // boost = f * sigmoid(m * std::log(References)) - 0.5 * f + 0.59
200 // Sample data points: (10, 1.00), (100, 1.41), (1000, 1.82),
201 // (10K, 2.21), (100K, 2.58), (1M, 2.94)
Ilya Biryukov74f26552018-07-26 12:05:31 +0000202 float S = std::pow(References, -0.06);
203 Score *= 6.0 * (1 - S) / (1 + S) + 0.59;
Eric Liu84bd5db2018-07-25 11:26:35 +0000204 }
Sam McCallc5707b62018-05-15 17:43:27 +0000205
Sam McCallc5707b62018-05-15 17:43:27 +0000206 if (Deprecated)
Aaron Ballman215e4712018-05-18 13:18:41 +0000207 Score *= 0.1f;
Sam McCalle018b362018-06-08 09:36:34 +0000208 if (ReservedName)
209 Score *= 0.1f;
Sam McCallc5707b62018-05-15 17:43:27 +0000210
Sam McCall4a3c69b2018-06-06 08:53:36 +0000211 switch (Category) {
Kirill Bobyrev7cf29bc2018-07-05 09:37:26 +0000212 case Keyword: // Often relevant, but misses most signals.
213 Score *= 4; // FIXME: important keywords should have specific boosts.
214 break;
215 case Type:
216 case Function:
217 case Variable:
218 Score *= 1.1f;
219 break;
220 case Namespace:
221 Score *= 0.8f;
222 break;
223 case Macro:
224 Score *= 0.2f;
225 break;
226 case Unknown:
Eric Liud7de8112018-07-24 08:51:52 +0000227 case Constructor: // No boost constructors so they are after class types.
Kirill Bobyrev7cf29bc2018-07-05 09:37:26 +0000228 break;
Sam McCall4a3c69b2018-06-06 08:53:36 +0000229 }
230
Sam McCallc5707b62018-05-15 17:43:27 +0000231 return Score;
232}
233
234raw_ostream &operator<<(raw_ostream &OS, const SymbolQualitySignals &S) {
235 OS << formatv("=== Symbol quality: {0}\n", S.evaluate());
Sam McCallc5707b62018-05-15 17:43:27 +0000236 OS << formatv("\tReferences: {0}\n", S.References);
237 OS << formatv("\tDeprecated: {0}\n", S.Deprecated);
Sam McCalle018b362018-06-08 09:36:34 +0000238 OS << formatv("\tReserved name: {0}\n", S.ReservedName);
Sam McCall4a3c69b2018-06-06 08:53:36 +0000239 OS << formatv("\tCategory: {0}\n", static_cast<int>(S.Category));
Sam McCallc5707b62018-05-15 17:43:27 +0000240 return OS;
241}
242
Sam McCalld9b54f02018-06-05 16:30:25 +0000243static SymbolRelevanceSignals::AccessibleScope
Ilya Biryukov74f26552018-07-26 12:05:31 +0000244computeScope(const NamedDecl *D) {
Sam McCallabe37372018-06-27 11:43:54 +0000245 // Injected "Foo" within the class "Foo" has file scope, not class scope.
246 const DeclContext *DC = D->getDeclContext();
247 if (auto *R = dyn_cast_or_null<RecordDecl>(D))
248 if (R->isInjectedClassName())
249 DC = DC->getParent();
Eric Liu8944f0e2018-07-05 08:14:04 +0000250 // Class constructor should have the same scope as the class.
Simon Pilgrim4a032012018-07-05 09:35:12 +0000251 if (isa<CXXConstructorDecl>(D))
Eric Liu8944f0e2018-07-05 08:14:04 +0000252 DC = DC->getParent();
Sam McCall89f52932018-06-05 18:00:48 +0000253 bool InClass = false;
Sam McCallabe37372018-06-27 11:43:54 +0000254 for (; !DC->isFileContext(); DC = DC->getParent()) {
Sam McCalld9b54f02018-06-05 16:30:25 +0000255 if (DC->isFunctionOrMethod())
256 return SymbolRelevanceSignals::FunctionScope;
257 InClass = InClass || DC->isRecord();
258 }
259 if (InClass)
260 return SymbolRelevanceSignals::ClassScope;
261 // This threshold could be tweaked, e.g. to treat module-visible as global.
Sam McCallabe37372018-06-27 11:43:54 +0000262 if (D->getLinkageInternal() < ExternalLinkage)
Sam McCalld9b54f02018-06-05 16:30:25 +0000263 return SymbolRelevanceSignals::FileScope;
264 return SymbolRelevanceSignals::GlobalScope;
265}
266
267void SymbolRelevanceSignals::merge(const Symbol &IndexResult) {
268 // FIXME: Index results always assumed to be at global scope. If Scope becomes
269 // relevant to non-completion requests, we should recognize class members etc.
Eric Liu09c3c372018-06-15 08:58:12 +0000270
271 SymbolURI = IndexResult.CanonicalDeclaration.FileURI;
Eric Liu5d2a8072018-07-23 10:56:37 +0000272 IsInstanceMember |= isInstanceMember(IndexResult.SymInfo);
Sam McCalld9b54f02018-06-05 16:30:25 +0000273}
274
Sam McCallc5707b62018-05-15 17:43:27 +0000275void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) {
276 if (SemaCCResult.Availability == CXAvailability_NotAvailable ||
277 SemaCCResult.Availability == CXAvailability_NotAccessible)
278 Forbidden = true;
Ilya Biryukovf0296462018-06-04 14:50:59 +0000279
280 if (SemaCCResult.Declaration) {
Eric Liu09c3c372018-06-15 08:58:12 +0000281 // We boost things that have decls in the main file. We give a fixed score
282 // for all other declarations in sema as they are already included in the
283 // translation unit.
Kirill Bobyrev47d7f522018-07-11 14:49:49 +0000284 float DeclProximity = (hasDeclInMainFile(*SemaCCResult.Declaration) ||
285 hasUsingDeclInMainFile(SemaCCResult))
286 ? 1.0
287 : 0.6;
Eric Liu09c3c372018-06-15 08:58:12 +0000288 SemaProximityScore = std::max(DeclProximity, SemaProximityScore);
Eric Liu5d2a8072018-07-23 10:56:37 +0000289 IsInstanceMember |= isInstanceMember(SemaCCResult.Declaration);
Ilya Biryukovf0296462018-06-04 14:50:59 +0000290 }
Sam McCalld9b54f02018-06-05 16:30:25 +0000291
292 // Declarations are scoped, others (like macros) are assumed global.
Sam McCall661d89c2018-06-05 17:58:12 +0000293 if (SemaCCResult.Declaration)
Ilya Biryukov74f26552018-07-26 12:05:31 +0000294 Scope = std::min(Scope, computeScope(SemaCCResult.Declaration));
Sam McCallc5707b62018-05-15 17:43:27 +0000295}
296
Sam McCall3f0243f2018-07-03 08:09:29 +0000297static std::pair<float, unsigned> proximityScore(llvm::StringRef SymbolURI,
298 URIDistance *D) {
299 if (!D || SymbolURI.empty())
300 return {0.f, 0u};
301 unsigned Distance = D->distance(SymbolURI);
302 // Assume approximately default options are used for sensible scoring.
303 return {std::exp(Distance * -0.4f / FileDistanceOptions().UpCost), Distance};
304}
305
Sam McCallc5707b62018-05-15 17:43:27 +0000306float SymbolRelevanceSignals::evaluate() const {
Sam McCalld9b54f02018-06-05 16:30:25 +0000307 float Score = 1;
308
Sam McCallc5707b62018-05-15 17:43:27 +0000309 if (Forbidden)
310 return 0;
Ilya Biryukovf0296462018-06-04 14:50:59 +0000311
Sam McCalld9b54f02018-06-05 16:30:25 +0000312 Score *= NameMatch;
313
Ilya Biryukovf0296462018-06-04 14:50:59 +0000314 // Proximity scores are [0,1] and we translate them into a multiplier in the
Sam McCall3f0243f2018-07-03 08:09:29 +0000315 // range from 1 to 3.
316 Score *= 1 + 2 * std::max(proximityScore(SymbolURI, FileProximityMatch).first,
317 SemaProximityScore);
Sam McCalld9b54f02018-06-05 16:30:25 +0000318
319 // Symbols like local variables may only be referenced within their scope.
320 // Conversely if we're in that scope, it's likely we'll reference them.
321 if (Query == CodeComplete) {
322 // The narrower the scope where a symbol is visible, the more likely it is
323 // to be relevant when it is available.
324 switch (Scope) {
325 case GlobalScope:
326 break;
327 case FileScope:
328 Score *= 1.5;
Sam McCallc22c9aa2018-06-07 08:16:36 +0000329 break;
Sam McCalld9b54f02018-06-05 16:30:25 +0000330 case ClassScope:
331 Score *= 2;
Sam McCallc22c9aa2018-06-07 08:16:36 +0000332 break;
Sam McCalld9b54f02018-06-05 16:30:25 +0000333 case FunctionScope:
334 Score *= 4;
Sam McCallc22c9aa2018-06-07 08:16:36 +0000335 break;
Sam McCalld9b54f02018-06-05 16:30:25 +0000336 }
337 }
338
Eric Liu5d2a8072018-07-23 10:56:37 +0000339 // Penalize non-instance members when they are accessed via a class instance.
340 if (!IsInstanceMember &&
341 (Context == CodeCompletionContext::CCC_DotMemberAccess ||
342 Context == CodeCompletionContext::CCC_ArrowMemberAccess)) {
343 Score *= 0.5;
344 }
345
Ilya Biryukovf0296462018-06-04 14:50:59 +0000346 return Score;
Sam McCallc5707b62018-05-15 17:43:27 +0000347}
Eric Liu09c3c372018-06-15 08:58:12 +0000348
Sam McCallc5707b62018-05-15 17:43:27 +0000349raw_ostream &operator<<(raw_ostream &OS, const SymbolRelevanceSignals &S) {
350 OS << formatv("=== Symbol relevance: {0}\n", S.evaluate());
351 OS << formatv("\tName match: {0}\n", S.NameMatch);
352 OS << formatv("\tForbidden: {0}\n", S.Forbidden);
Eric Liu5d2a8072018-07-23 10:56:37 +0000353 OS << formatv("\tIsInstanceMember: {0}\n", S.IsInstanceMember);
354 OS << formatv("\tContext: {0}\n", getCompletionKindString(S.Context));
Eric Liu09c3c372018-06-15 08:58:12 +0000355 OS << formatv("\tSymbol URI: {0}\n", S.SymbolURI);
356 if (S.FileProximityMatch) {
Sam McCall3f0243f2018-07-03 08:09:29 +0000357 auto Score = proximityScore(S.SymbolURI, S.FileProximityMatch);
358 OS << formatv("\tIndex proximity: {0} (distance={1})\n", Score.first,
359 Score.second);
Eric Liu09c3c372018-06-15 08:58:12 +0000360 }
361 OS << formatv("\tSema proximity: {0}\n", S.SemaProximityScore);
Sam McCall661d89c2018-06-05 17:58:12 +0000362 OS << formatv("\tQuery type: {0}\n", static_cast<int>(S.Query));
363 OS << formatv("\tScope: {0}\n", static_cast<int>(S.Scope));
Sam McCallc5707b62018-05-15 17:43:27 +0000364 return OS;
365}
366
367float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance) {
368 return SymbolQuality * SymbolRelevance;
369}
370
371// Produces an integer that sorts in the same order as F.
372// That is: a < b <==> encodeFloat(a) < encodeFloat(b).
373static uint32_t encodeFloat(float F) {
374 static_assert(std::numeric_limits<float>::is_iec559, "");
375 constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1);
376
377 // Get the bits of the float. Endianness is the same as for integers.
378 uint32_t U = FloatToBits(F);
379 // IEEE 754 floats compare like sign-magnitude integers.
380 if (U & TopBit) // Negative float.
381 return 0 - U; // Map onto the low half of integers, order reversed.
382 return U + TopBit; // Positive floats map onto the high half of integers.
383}
384
385std::string sortText(float Score, llvm::StringRef Name) {
386 // We convert -Score to an integer, and hex-encode for readability.
387 // Example: [0.5, "foo"] -> "41000000foo"
388 std::string S;
389 llvm::raw_string_ostream OS(S);
390 write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower,
391 /*Width=*/2 * sizeof(Score));
392 OS << Name;
393 OS.flush();
394 return S;
395}
396
397} // namespace clangd
398} // namespace clang