blob: 3e3103ff2e3b322d4604cb999fb8912f8b1647dd [file] [log] [blame]
Sam McCallc5707b62018-05-15 17:43:27 +00001//===--- Quality.cpp --------------------------------------------*- C++-*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===---------------------------------------------------------------------===//
9#include "Quality.h"
Eric Liucdc5f6a2018-06-28 16:51:12 +000010#include <cmath>
Eric Liu09c3c372018-06-15 08:58:12 +000011#include "URI.h"
Sam McCallc5707b62018-05-15 17:43:27 +000012#include "index/Index.h"
Ilya Biryukovf0296462018-06-04 14:50:59 +000013#include "clang/AST/ASTContext.h"
Sam McCalle018b362018-06-08 09:36:34 +000014#include "clang/Basic/CharInfo.h"
Sam McCall4a3c69b2018-06-06 08:53:36 +000015#include "clang/AST/DeclVisitor.h"
Ilya Biryukovf0296462018-06-04 14:50:59 +000016#include "clang/Basic/SourceManager.h"
Sam McCallc5707b62018-05-15 17:43:27 +000017#include "clang/Sema/CodeCompleteConsumer.h"
18#include "llvm/Support/FormatVariadic.h"
19#include "llvm/Support/MathExtras.h"
20#include "llvm/Support/raw_ostream.h"
21
22namespace clang {
23namespace clangd {
24using namespace llvm;
Sam McCalle018b362018-06-08 09:36:34 +000025static bool IsReserved(StringRef Name) {
26 // FIXME: Should we exclude _Bool and others recognized by the standard?
27 return Name.size() >= 2 && Name[0] == '_' &&
28 (isUppercase(Name[1]) || Name[1] == '_');
29}
Sam McCallc5707b62018-05-15 17:43:27 +000030
Ilya Biryukovf0296462018-06-04 14:50:59 +000031static bool hasDeclInMainFile(const Decl &D) {
32 auto &SourceMgr = D.getASTContext().getSourceManager();
33 for (auto *Redecl : D.redecls()) {
34 auto Loc = SourceMgr.getSpellingLoc(Redecl->getLocation());
35 if (SourceMgr.isWrittenInMainFile(Loc))
36 return true;
37 }
38 return false;
39}
40
Sam McCall4a3c69b2018-06-06 08:53:36 +000041static SymbolQualitySignals::SymbolCategory categorize(const NamedDecl &ND) {
42 class Switch
43 : public ConstDeclVisitor<Switch, SymbolQualitySignals::SymbolCategory> {
44 public:
45#define MAP(DeclType, Category) \
46 SymbolQualitySignals::SymbolCategory Visit##DeclType(const DeclType *) { \
47 return SymbolQualitySignals::Category; \
48 }
49 MAP(NamespaceDecl, Namespace);
50 MAP(NamespaceAliasDecl, Namespace);
51 MAP(TypeDecl, Type);
52 MAP(TypeAliasTemplateDecl, Type);
53 MAP(ClassTemplateDecl, Type);
54 MAP(ValueDecl, Variable);
55 MAP(VarTemplateDecl, Variable);
56 MAP(FunctionDecl, Function);
57 MAP(FunctionTemplateDecl, Function);
58 MAP(Decl, Unknown);
59#undef MAP
60 };
61 return Switch().Visit(&ND);
62}
63
Sam McCallc3b5bad2018-06-14 13:42:21 +000064static SymbolQualitySignals::SymbolCategory categorize(const CodeCompletionResult &R) {
65 if (R.Declaration)
66 return categorize(*R.Declaration);
67 if (R.Kind == CodeCompletionResult::RK_Macro)
68 return SymbolQualitySignals::Macro;
69 // Everything else is a keyword or a pattern. Patterns are mostly keywords
70 // too, except a few which we recognize by cursor kind.
71 switch (R.CursorKind) {
72 case CXCursor_CXXMethod:
73 return SymbolQualitySignals::Function;
74 case CXCursor_ModuleImportDecl:
75 return SymbolQualitySignals::Namespace;
76 case CXCursor_MacroDefinition:
77 return SymbolQualitySignals::Macro;
78 case CXCursor_TypeRef:
79 return SymbolQualitySignals::Type;
80 case CXCursor_MemberRef:
81 return SymbolQualitySignals::Variable;
82 default:
83 return SymbolQualitySignals::Keyword;
84 }
85}
86
Sam McCall4a3c69b2018-06-06 08:53:36 +000087static SymbolQualitySignals::SymbolCategory
88categorize(const index::SymbolInfo &D) {
89 switch (D.Kind) {
90 case index::SymbolKind::Namespace:
91 case index::SymbolKind::NamespaceAlias:
92 return SymbolQualitySignals::Namespace;
93 case index::SymbolKind::Macro:
94 return SymbolQualitySignals::Macro;
95 case index::SymbolKind::Enum:
96 case index::SymbolKind::Struct:
97 case index::SymbolKind::Class:
98 case index::SymbolKind::Protocol:
99 case index::SymbolKind::Extension:
100 case index::SymbolKind::Union:
101 case index::SymbolKind::TypeAlias:
102 return SymbolQualitySignals::Type;
103 case index::SymbolKind::Function:
104 case index::SymbolKind::ClassMethod:
105 case index::SymbolKind::InstanceMethod:
106 case index::SymbolKind::StaticMethod:
107 case index::SymbolKind::InstanceProperty:
108 case index::SymbolKind::ClassProperty:
109 case index::SymbolKind::StaticProperty:
110 case index::SymbolKind::Constructor:
111 case index::SymbolKind::Destructor:
112 case index::SymbolKind::ConversionFunction:
113 return SymbolQualitySignals::Function;
114 case index::SymbolKind::Variable:
115 case index::SymbolKind::Field:
116 case index::SymbolKind::EnumConstant:
117 case index::SymbolKind::Parameter:
118 return SymbolQualitySignals::Variable;
119 case index::SymbolKind::Using:
120 case index::SymbolKind::Module:
121 case index::SymbolKind::Unknown:
122 return SymbolQualitySignals::Unknown;
123 }
Tim Northover0698e962018-06-06 13:28:49 +0000124 llvm_unreachable("Unknown index::SymbolKind");
Sam McCall4a3c69b2018-06-06 08:53:36 +0000125}
126
Sam McCallc5707b62018-05-15 17:43:27 +0000127void SymbolQualitySignals::merge(const CodeCompletionResult &SemaCCResult) {
Sam McCallc5707b62018-05-15 17:43:27 +0000128 if (SemaCCResult.Availability == CXAvailability_Deprecated)
129 Deprecated = true;
Sam McCall4a3c69b2018-06-06 08:53:36 +0000130
Sam McCallc3b5bad2018-06-14 13:42:21 +0000131 Category = categorize(SemaCCResult);
Sam McCalle018b362018-06-08 09:36:34 +0000132
133 if (SemaCCResult.Declaration) {
134 if (auto *ID = SemaCCResult.Declaration->getIdentifier())
135 ReservedName = ReservedName || IsReserved(ID->getName());
136 } else if (SemaCCResult.Kind == CodeCompletionResult::RK_Macro)
137 ReservedName = ReservedName || IsReserved(SemaCCResult.Macro->getName());
Sam McCallc5707b62018-05-15 17:43:27 +0000138}
139
140void SymbolQualitySignals::merge(const Symbol &IndexResult) {
141 References = std::max(IndexResult.References, References);
Sam McCall4a3c69b2018-06-06 08:53:36 +0000142 Category = categorize(IndexResult.SymInfo);
Sam McCalle018b362018-06-08 09:36:34 +0000143 ReservedName = ReservedName || IsReserved(IndexResult.Name);
Sam McCallc5707b62018-05-15 17:43:27 +0000144}
145
146float SymbolQualitySignals::evaluate() const {
147 float Score = 1;
148
149 // This avoids a sharp gradient for tail symbols, and also neatly avoids the
150 // question of whether 0 references means a bad symbol or missing data.
Eric Liucdc5f6a2018-06-28 16:51:12 +0000151 if (References >= 10)
152 Score *= std::log10(References);
Sam McCallc5707b62018-05-15 17:43:27 +0000153
Sam McCallc5707b62018-05-15 17:43:27 +0000154 if (Deprecated)
Aaron Ballman215e4712018-05-18 13:18:41 +0000155 Score *= 0.1f;
Sam McCalle018b362018-06-08 09:36:34 +0000156 if (ReservedName)
157 Score *= 0.1f;
Sam McCallc5707b62018-05-15 17:43:27 +0000158
Sam McCall4a3c69b2018-06-06 08:53:36 +0000159 switch (Category) {
Sam McCallabe37372018-06-27 11:43:54 +0000160 case Keyword: // Often relevant, but misses most signals.
161 Score *= 4; // FIXME: important keywords should have specific boosts.
Sam McCallc3b5bad2018-06-14 13:42:21 +0000162 break;
Sam McCall4a3c69b2018-06-06 08:53:36 +0000163 case Type:
164 case Function:
165 case Variable:
Simon Pilgrim0c9e1c82018-06-06 12:48:27 +0000166 Score *= 1.1f;
Sam McCall4a3c69b2018-06-06 08:53:36 +0000167 break;
168 case Namespace:
Simon Pilgrim0c9e1c82018-06-06 12:48:27 +0000169 Score *= 0.8f;
Sam McCallbc7cbb72018-06-06 12:38:37 +0000170 break;
Sam McCall4a3c69b2018-06-06 08:53:36 +0000171 case Macro:
Simon Pilgrim0c9e1c82018-06-06 12:48:27 +0000172 Score *= 0.2f;
Sam McCall4a3c69b2018-06-06 08:53:36 +0000173 break;
174 case Unknown:
175 break;
176 }
177
Sam McCallc5707b62018-05-15 17:43:27 +0000178 return Score;
179}
180
181raw_ostream &operator<<(raw_ostream &OS, const SymbolQualitySignals &S) {
182 OS << formatv("=== Symbol quality: {0}\n", S.evaluate());
Sam McCallc5707b62018-05-15 17:43:27 +0000183 OS << formatv("\tReferences: {0}\n", S.References);
184 OS << formatv("\tDeprecated: {0}\n", S.Deprecated);
Sam McCalle018b362018-06-08 09:36:34 +0000185 OS << formatv("\tReserved name: {0}\n", S.ReservedName);
Sam McCall4a3c69b2018-06-06 08:53:36 +0000186 OS << formatv("\tCategory: {0}\n", static_cast<int>(S.Category));
Sam McCallc5707b62018-05-15 17:43:27 +0000187 return OS;
188}
189
Eric Liu09c3c372018-06-15 08:58:12 +0000190/// Calculates a proximity score from \p From and \p To, which are URI strings
191/// that have the same scheme. This does not parse URI. A URI (sans "<scheme>:")
192/// is split into chunks by '/' and each chunk is considered a file/directory.
193/// For example, "uri:///a/b/c" will be treated as /a/b/c
194static float uriProximity(StringRef From, StringRef To) {
195 auto SchemeSplitFrom = From.split(':');
196 auto SchemeSplitTo = To.split(':');
197 assert((SchemeSplitFrom.first == SchemeSplitTo.first) &&
198 "URIs must have the same scheme in order to compute proximity.");
199 auto Split = [](StringRef URIWithoutScheme) {
200 SmallVector<StringRef, 8> Split;
201 URIWithoutScheme.split(Split, '/', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
202 return Split;
203 };
204 SmallVector<StringRef, 8> Fs = Split(SchemeSplitFrom.second);
205 SmallVector<StringRef, 8> Ts = Split(SchemeSplitTo.second);
206 auto F = Fs.begin(), T = Ts.begin(), FE = Fs.end(), TE = Ts.end();
207 for (; F != FE && T != TE && *F == *T; ++F, ++T) {
208 }
209 // We penalize for traversing up and down from \p From to \p To but penalize
210 // less for traversing down because subprojects are more closely related than
211 // superprojects.
212 int UpDist = FE - F;
213 int DownDist = TE - T;
214 return std::pow(0.7, UpDist + DownDist/2);
215}
216
217FileProximityMatcher::FileProximityMatcher(ArrayRef<StringRef> ProximityPaths)
218 : ProximityPaths(ProximityPaths.begin(), ProximityPaths.end()) {}
219
220float FileProximityMatcher::uriProximity(StringRef SymbolURI) const {
221 float Score = 0;
222 if (!ProximityPaths.empty() && !SymbolURI.empty()) {
223 for (const auto &Path : ProximityPaths)
224 // Only calculate proximity score for two URIs with the same scheme so
225 // that the computation can be purely text-based and thus avoid expensive
226 // URI encoding/decoding.
227 if (auto U = URI::create(Path, SymbolURI.split(':').first)) {
228 Score = std::max(Score, clangd::uriProximity(U->toString(), SymbolURI));
229 } else {
230 llvm::consumeError(U.takeError());
231 }
232 }
233 return Score;
234}
235
236llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
237 const FileProximityMatcher &M) {
238 OS << formatv("File proximity matcher: ");
Eric Liuffaaf7d2018-06-21 09:51:28 +0000239 OS << formatv("ProximityPaths[{0}]", llvm::join(M.ProximityPaths.begin(),
Eric Liu09c3c372018-06-15 08:58:12 +0000240 M.ProximityPaths.end(), ","));
241 return OS;
242}
243
Sam McCalld9b54f02018-06-05 16:30:25 +0000244static SymbolRelevanceSignals::AccessibleScope
Sam McCallabe37372018-06-27 11:43:54 +0000245ComputeScope(const NamedDecl *D) {
246 // Injected "Foo" within the class "Foo" has file scope, not class scope.
247 const DeclContext *DC = D->getDeclContext();
248 if (auto *R = dyn_cast_or_null<RecordDecl>(D))
249 if (R->isInjectedClassName())
250 DC = DC->getParent();
Sam McCall89f52932018-06-05 18:00:48 +0000251 bool InClass = false;
Sam McCallabe37372018-06-27 11:43:54 +0000252 for (; !DC->isFileContext(); DC = DC->getParent()) {
Sam McCalld9b54f02018-06-05 16:30:25 +0000253 if (DC->isFunctionOrMethod())
254 return SymbolRelevanceSignals::FunctionScope;
255 InClass = InClass || DC->isRecord();
256 }
257 if (InClass)
258 return SymbolRelevanceSignals::ClassScope;
259 // This threshold could be tweaked, e.g. to treat module-visible as global.
Sam McCallabe37372018-06-27 11:43:54 +0000260 if (D->getLinkageInternal() < ExternalLinkage)
Sam McCalld9b54f02018-06-05 16:30:25 +0000261 return SymbolRelevanceSignals::FileScope;
262 return SymbolRelevanceSignals::GlobalScope;
263}
264
265void SymbolRelevanceSignals::merge(const Symbol &IndexResult) {
266 // FIXME: Index results always assumed to be at global scope. If Scope becomes
267 // relevant to non-completion requests, we should recognize class members etc.
Eric Liu09c3c372018-06-15 08:58:12 +0000268
269 SymbolURI = IndexResult.CanonicalDeclaration.FileURI;
Sam McCalld9b54f02018-06-05 16:30:25 +0000270}
271
Sam McCallc5707b62018-05-15 17:43:27 +0000272void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) {
273 if (SemaCCResult.Availability == CXAvailability_NotAvailable ||
274 SemaCCResult.Availability == CXAvailability_NotAccessible)
275 Forbidden = true;
Ilya Biryukovf0296462018-06-04 14:50:59 +0000276
277 if (SemaCCResult.Declaration) {
Eric Liu09c3c372018-06-15 08:58:12 +0000278 // We boost things that have decls in the main file. We give a fixed score
279 // for all other declarations in sema as they are already included in the
280 // translation unit.
Ilya Biryukovf0296462018-06-04 14:50:59 +0000281 float DeclProximity =
Eric Liu09c3c372018-06-15 08:58:12 +0000282 hasDeclInMainFile(*SemaCCResult.Declaration) ? 1.0 : 0.6;
283 SemaProximityScore = std::max(DeclProximity, SemaProximityScore);
Ilya Biryukovf0296462018-06-04 14:50:59 +0000284 }
Sam McCalld9b54f02018-06-05 16:30:25 +0000285
286 // Declarations are scoped, others (like macros) are assumed global.
Sam McCall661d89c2018-06-05 17:58:12 +0000287 if (SemaCCResult.Declaration)
Sam McCallabe37372018-06-27 11:43:54 +0000288 Scope = std::min(Scope, ComputeScope(SemaCCResult.Declaration));
Sam McCallc5707b62018-05-15 17:43:27 +0000289}
290
291float SymbolRelevanceSignals::evaluate() const {
Sam McCalld9b54f02018-06-05 16:30:25 +0000292 float Score = 1;
293
Sam McCallc5707b62018-05-15 17:43:27 +0000294 if (Forbidden)
295 return 0;
Ilya Biryukovf0296462018-06-04 14:50:59 +0000296
Sam McCalld9b54f02018-06-05 16:30:25 +0000297 Score *= NameMatch;
298
Eric Liu09c3c372018-06-15 08:58:12 +0000299 float IndexProximityScore =
300 FileProximityMatch ? FileProximityMatch->uriProximity(SymbolURI) : 0;
Ilya Biryukovf0296462018-06-04 14:50:59 +0000301 // Proximity scores are [0,1] and we translate them into a multiplier in the
302 // range from 1 to 2.
Eric Liu09c3c372018-06-15 08:58:12 +0000303 Score *= 1 + std::max(IndexProximityScore, SemaProximityScore);
Sam McCalld9b54f02018-06-05 16:30:25 +0000304
305 // Symbols like local variables may only be referenced within their scope.
306 // Conversely if we're in that scope, it's likely we'll reference them.
307 if (Query == CodeComplete) {
308 // The narrower the scope where a symbol is visible, the more likely it is
309 // to be relevant when it is available.
310 switch (Scope) {
311 case GlobalScope:
312 break;
313 case FileScope:
314 Score *= 1.5;
Sam McCallc22c9aa2018-06-07 08:16:36 +0000315 break;
Sam McCalld9b54f02018-06-05 16:30:25 +0000316 case ClassScope:
317 Score *= 2;
Sam McCallc22c9aa2018-06-07 08:16:36 +0000318 break;
Sam McCalld9b54f02018-06-05 16:30:25 +0000319 case FunctionScope:
320 Score *= 4;
Sam McCallc22c9aa2018-06-07 08:16:36 +0000321 break;
Sam McCalld9b54f02018-06-05 16:30:25 +0000322 }
323 }
324
Ilya Biryukovf0296462018-06-04 14:50:59 +0000325 return Score;
Sam McCallc5707b62018-05-15 17:43:27 +0000326}
Eric Liu09c3c372018-06-15 08:58:12 +0000327
Sam McCallc5707b62018-05-15 17:43:27 +0000328raw_ostream &operator<<(raw_ostream &OS, const SymbolRelevanceSignals &S) {
329 OS << formatv("=== Symbol relevance: {0}\n", S.evaluate());
330 OS << formatv("\tName match: {0}\n", S.NameMatch);
331 OS << formatv("\tForbidden: {0}\n", S.Forbidden);
Eric Liu09c3c372018-06-15 08:58:12 +0000332 OS << formatv("\tSymbol URI: {0}\n", S.SymbolURI);
333 if (S.FileProximityMatch) {
Eric Liuffaaf7d2018-06-21 09:51:28 +0000334 OS << "\tIndex proximity: "
335 << S.FileProximityMatch->uriProximity(S.SymbolURI) << " ("
336 << *S.FileProximityMatch << ")\n";
Eric Liu09c3c372018-06-15 08:58:12 +0000337 }
338 OS << formatv("\tSema proximity: {0}\n", S.SemaProximityScore);
Sam McCall661d89c2018-06-05 17:58:12 +0000339 OS << formatv("\tQuery type: {0}\n", static_cast<int>(S.Query));
340 OS << formatv("\tScope: {0}\n", static_cast<int>(S.Scope));
Sam McCallc5707b62018-05-15 17:43:27 +0000341 return OS;
342}
343
344float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance) {
345 return SymbolQuality * SymbolRelevance;
346}
347
348// Produces an integer that sorts in the same order as F.
349// That is: a < b <==> encodeFloat(a) < encodeFloat(b).
350static uint32_t encodeFloat(float F) {
351 static_assert(std::numeric_limits<float>::is_iec559, "");
352 constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1);
353
354 // Get the bits of the float. Endianness is the same as for integers.
355 uint32_t U = FloatToBits(F);
356 // IEEE 754 floats compare like sign-magnitude integers.
357 if (U & TopBit) // Negative float.
358 return 0 - U; // Map onto the low half of integers, order reversed.
359 return U + TopBit; // Positive floats map onto the high half of integers.
360}
361
362std::string sortText(float Score, llvm::StringRef Name) {
363 // We convert -Score to an integer, and hex-encode for readability.
364 // Example: [0.5, "foo"] -> "41000000foo"
365 std::string S;
366 llvm::raw_string_ostream OS(S);
367 write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower,
368 /*Width=*/2 * sizeof(Score));
369 OS << Name;
370 OS.flush();
371 return S;
372}
373
374} // namespace clangd
375} // namespace clang