blob: 2dd844408f6d8781fe8b9954cd7afe51ecea9062 [file] [log] [blame]
Sam McCallc5707b62018-05-15 17:43:27 +00001//===--- Quality.cpp --------------------------------------------*- C++-*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===---------------------------------------------------------------------===//
9#include "Quality.h"
Sam McCall3f0243f2018-07-03 08:09:29 +000010#include "FileDistance.h"
Eric Liu09c3c372018-06-15 08:58:12 +000011#include "URI.h"
Sam McCallc5707b62018-05-15 17:43:27 +000012#include "index/Index.h"
Ilya Biryukovf0296462018-06-04 14:50:59 +000013#include "clang/AST/ASTContext.h"
Eric Liu8944f0e2018-07-05 08:14:04 +000014#include "clang/AST/DeclCXX.h"
Sam McCall4a3c69b2018-06-06 08:53:36 +000015#include "clang/AST/DeclVisitor.h"
Sam McCall3f0243f2018-07-03 08:09:29 +000016#include "clang/Basic/CharInfo.h"
Ilya Biryukovf0296462018-06-04 14:50:59 +000017#include "clang/Basic/SourceManager.h"
Sam McCallc5707b62018-05-15 17:43:27 +000018#include "clang/Sema/CodeCompleteConsumer.h"
Eric Liu8944f0e2018-07-05 08:14:04 +000019#include "llvm/Support/Casting.h"
Sam McCallc5707b62018-05-15 17:43:27 +000020#include "llvm/Support/FormatVariadic.h"
21#include "llvm/Support/MathExtras.h"
22#include "llvm/Support/raw_ostream.h"
Sam McCall3f0243f2018-07-03 08:09:29 +000023#include <cmath>
Sam McCallc5707b62018-05-15 17:43:27 +000024
25namespace clang {
26namespace clangd {
27using namespace llvm;
Sam McCalle018b362018-06-08 09:36:34 +000028static bool IsReserved(StringRef Name) {
29 // FIXME: Should we exclude _Bool and others recognized by the standard?
30 return Name.size() >= 2 && Name[0] == '_' &&
31 (isUppercase(Name[1]) || Name[1] == '_');
32}
Sam McCallc5707b62018-05-15 17:43:27 +000033
Ilya Biryukovf0296462018-06-04 14:50:59 +000034static bool hasDeclInMainFile(const Decl &D) {
35 auto &SourceMgr = D.getASTContext().getSourceManager();
36 for (auto *Redecl : D.redecls()) {
37 auto Loc = SourceMgr.getSpellingLoc(Redecl->getLocation());
38 if (SourceMgr.isWrittenInMainFile(Loc))
39 return true;
40 }
41 return false;
42}
43
Sam McCall4a3c69b2018-06-06 08:53:36 +000044static SymbolQualitySignals::SymbolCategory categorize(const NamedDecl &ND) {
45 class Switch
46 : public ConstDeclVisitor<Switch, SymbolQualitySignals::SymbolCategory> {
47 public:
48#define MAP(DeclType, Category) \
49 SymbolQualitySignals::SymbolCategory Visit##DeclType(const DeclType *) { \
50 return SymbolQualitySignals::Category; \
51 }
52 MAP(NamespaceDecl, Namespace);
53 MAP(NamespaceAliasDecl, Namespace);
54 MAP(TypeDecl, Type);
55 MAP(TypeAliasTemplateDecl, Type);
56 MAP(ClassTemplateDecl, Type);
57 MAP(ValueDecl, Variable);
58 MAP(VarTemplateDecl, Variable);
59 MAP(FunctionDecl, Function);
60 MAP(FunctionTemplateDecl, Function);
61 MAP(Decl, Unknown);
62#undef MAP
63 };
64 return Switch().Visit(&ND);
65}
66
Sam McCallc3b5bad2018-06-14 13:42:21 +000067static SymbolQualitySignals::SymbolCategory categorize(const CodeCompletionResult &R) {
68 if (R.Declaration)
69 return categorize(*R.Declaration);
70 if (R.Kind == CodeCompletionResult::RK_Macro)
71 return SymbolQualitySignals::Macro;
72 // Everything else is a keyword or a pattern. Patterns are mostly keywords
73 // too, except a few which we recognize by cursor kind.
74 switch (R.CursorKind) {
75 case CXCursor_CXXMethod:
76 return SymbolQualitySignals::Function;
77 case CXCursor_ModuleImportDecl:
78 return SymbolQualitySignals::Namespace;
79 case CXCursor_MacroDefinition:
80 return SymbolQualitySignals::Macro;
81 case CXCursor_TypeRef:
82 return SymbolQualitySignals::Type;
83 case CXCursor_MemberRef:
84 return SymbolQualitySignals::Variable;
85 default:
86 return SymbolQualitySignals::Keyword;
87 }
88}
89
Sam McCall4a3c69b2018-06-06 08:53:36 +000090static SymbolQualitySignals::SymbolCategory
91categorize(const index::SymbolInfo &D) {
92 switch (D.Kind) {
93 case index::SymbolKind::Namespace:
94 case index::SymbolKind::NamespaceAlias:
95 return SymbolQualitySignals::Namespace;
96 case index::SymbolKind::Macro:
97 return SymbolQualitySignals::Macro;
98 case index::SymbolKind::Enum:
99 case index::SymbolKind::Struct:
100 case index::SymbolKind::Class:
101 case index::SymbolKind::Protocol:
102 case index::SymbolKind::Extension:
103 case index::SymbolKind::Union:
104 case index::SymbolKind::TypeAlias:
105 return SymbolQualitySignals::Type;
106 case index::SymbolKind::Function:
107 case index::SymbolKind::ClassMethod:
108 case index::SymbolKind::InstanceMethod:
109 case index::SymbolKind::StaticMethod:
110 case index::SymbolKind::InstanceProperty:
111 case index::SymbolKind::ClassProperty:
112 case index::SymbolKind::StaticProperty:
113 case index::SymbolKind::Constructor:
114 case index::SymbolKind::Destructor:
115 case index::SymbolKind::ConversionFunction:
116 return SymbolQualitySignals::Function;
117 case index::SymbolKind::Variable:
118 case index::SymbolKind::Field:
119 case index::SymbolKind::EnumConstant:
120 case index::SymbolKind::Parameter:
121 return SymbolQualitySignals::Variable;
122 case index::SymbolKind::Using:
123 case index::SymbolKind::Module:
124 case index::SymbolKind::Unknown:
125 return SymbolQualitySignals::Unknown;
126 }
Tim Northover0698e962018-06-06 13:28:49 +0000127 llvm_unreachable("Unknown index::SymbolKind");
Sam McCall4a3c69b2018-06-06 08:53:36 +0000128}
129
Sam McCallc5707b62018-05-15 17:43:27 +0000130void SymbolQualitySignals::merge(const CodeCompletionResult &SemaCCResult) {
Sam McCallc5707b62018-05-15 17:43:27 +0000131 if (SemaCCResult.Availability == CXAvailability_Deprecated)
132 Deprecated = true;
Sam McCall4a3c69b2018-06-06 08:53:36 +0000133
Sam McCallc3b5bad2018-06-14 13:42:21 +0000134 Category = categorize(SemaCCResult);
Sam McCalle018b362018-06-08 09:36:34 +0000135
136 if (SemaCCResult.Declaration) {
137 if (auto *ID = SemaCCResult.Declaration->getIdentifier())
138 ReservedName = ReservedName || IsReserved(ID->getName());
139 } else if (SemaCCResult.Kind == CodeCompletionResult::RK_Macro)
140 ReservedName = ReservedName || IsReserved(SemaCCResult.Macro->getName());
Sam McCallc5707b62018-05-15 17:43:27 +0000141}
142
143void SymbolQualitySignals::merge(const Symbol &IndexResult) {
144 References = std::max(IndexResult.References, References);
Sam McCall4a3c69b2018-06-06 08:53:36 +0000145 Category = categorize(IndexResult.SymInfo);
Sam McCalle018b362018-06-08 09:36:34 +0000146 ReservedName = ReservedName || IsReserved(IndexResult.Name);
Sam McCallc5707b62018-05-15 17:43:27 +0000147}
148
149float SymbolQualitySignals::evaluate() const {
150 float Score = 1;
151
152 // This avoids a sharp gradient for tail symbols, and also neatly avoids the
153 // question of whether 0 references means a bad symbol or missing data.
Eric Liucdc5f6a2018-06-28 16:51:12 +0000154 if (References >= 10)
155 Score *= std::log10(References);
Sam McCallc5707b62018-05-15 17:43:27 +0000156
Sam McCallc5707b62018-05-15 17:43:27 +0000157 if (Deprecated)
Aaron Ballman215e4712018-05-18 13:18:41 +0000158 Score *= 0.1f;
Sam McCalle018b362018-06-08 09:36:34 +0000159 if (ReservedName)
160 Score *= 0.1f;
Sam McCallc5707b62018-05-15 17:43:27 +0000161
Sam McCall4a3c69b2018-06-06 08:53:36 +0000162 switch (Category) {
Sam McCallabe37372018-06-27 11:43:54 +0000163 case Keyword: // Often relevant, but misses most signals.
164 Score *= 4; // FIXME: important keywords should have specific boosts.
Sam McCallc3b5bad2018-06-14 13:42:21 +0000165 break;
Sam McCall4a3c69b2018-06-06 08:53:36 +0000166 case Type:
167 case Function:
168 case Variable:
Simon Pilgrim0c9e1c82018-06-06 12:48:27 +0000169 Score *= 1.1f;
Sam McCall4a3c69b2018-06-06 08:53:36 +0000170 break;
171 case Namespace:
Simon Pilgrim0c9e1c82018-06-06 12:48:27 +0000172 Score *= 0.8f;
Sam McCallbc7cbb72018-06-06 12:38:37 +0000173 break;
Sam McCall4a3c69b2018-06-06 08:53:36 +0000174 case Macro:
Simon Pilgrim0c9e1c82018-06-06 12:48:27 +0000175 Score *= 0.2f;
Sam McCall4a3c69b2018-06-06 08:53:36 +0000176 break;
177 case Unknown:
178 break;
179 }
180
Sam McCallc5707b62018-05-15 17:43:27 +0000181 return Score;
182}
183
184raw_ostream &operator<<(raw_ostream &OS, const SymbolQualitySignals &S) {
185 OS << formatv("=== Symbol quality: {0}\n", S.evaluate());
Sam McCallc5707b62018-05-15 17:43:27 +0000186 OS << formatv("\tReferences: {0}\n", S.References);
187 OS << formatv("\tDeprecated: {0}\n", S.Deprecated);
Sam McCalle018b362018-06-08 09:36:34 +0000188 OS << formatv("\tReserved name: {0}\n", S.ReservedName);
Sam McCall4a3c69b2018-06-06 08:53:36 +0000189 OS << formatv("\tCategory: {0}\n", static_cast<int>(S.Category));
Sam McCallc5707b62018-05-15 17:43:27 +0000190 return OS;
191}
192
Sam McCalld9b54f02018-06-05 16:30:25 +0000193static SymbolRelevanceSignals::AccessibleScope
Sam McCallabe37372018-06-27 11:43:54 +0000194ComputeScope(const NamedDecl *D) {
195 // Injected "Foo" within the class "Foo" has file scope, not class scope.
196 const DeclContext *DC = D->getDeclContext();
197 if (auto *R = dyn_cast_or_null<RecordDecl>(D))
198 if (R->isInjectedClassName())
199 DC = DC->getParent();
Eric Liu8944f0e2018-07-05 08:14:04 +0000200 // Class constructor should have the same scope as the class.
201 if (const auto *Ctor = llvm::dyn_cast<CXXConstructorDecl>(D))
202 DC = DC->getParent();
Sam McCall89f52932018-06-05 18:00:48 +0000203 bool InClass = false;
Sam McCallabe37372018-06-27 11:43:54 +0000204 for (; !DC->isFileContext(); DC = DC->getParent()) {
Sam McCalld9b54f02018-06-05 16:30:25 +0000205 if (DC->isFunctionOrMethod())
206 return SymbolRelevanceSignals::FunctionScope;
207 InClass = InClass || DC->isRecord();
208 }
209 if (InClass)
210 return SymbolRelevanceSignals::ClassScope;
211 // This threshold could be tweaked, e.g. to treat module-visible as global.
Sam McCallabe37372018-06-27 11:43:54 +0000212 if (D->getLinkageInternal() < ExternalLinkage)
Sam McCalld9b54f02018-06-05 16:30:25 +0000213 return SymbolRelevanceSignals::FileScope;
214 return SymbolRelevanceSignals::GlobalScope;
215}
216
217void SymbolRelevanceSignals::merge(const Symbol &IndexResult) {
218 // FIXME: Index results always assumed to be at global scope. If Scope becomes
219 // relevant to non-completion requests, we should recognize class members etc.
Eric Liu09c3c372018-06-15 08:58:12 +0000220
221 SymbolURI = IndexResult.CanonicalDeclaration.FileURI;
Sam McCalld9b54f02018-06-05 16:30:25 +0000222}
223
Sam McCallc5707b62018-05-15 17:43:27 +0000224void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) {
225 if (SemaCCResult.Availability == CXAvailability_NotAvailable ||
226 SemaCCResult.Availability == CXAvailability_NotAccessible)
227 Forbidden = true;
Ilya Biryukovf0296462018-06-04 14:50:59 +0000228
229 if (SemaCCResult.Declaration) {
Eric Liu09c3c372018-06-15 08:58:12 +0000230 // We boost things that have decls in the main file. We give a fixed score
231 // for all other declarations in sema as they are already included in the
232 // translation unit.
Ilya Biryukovf0296462018-06-04 14:50:59 +0000233 float DeclProximity =
Eric Liu09c3c372018-06-15 08:58:12 +0000234 hasDeclInMainFile(*SemaCCResult.Declaration) ? 1.0 : 0.6;
235 SemaProximityScore = std::max(DeclProximity, SemaProximityScore);
Ilya Biryukovf0296462018-06-04 14:50:59 +0000236 }
Sam McCalld9b54f02018-06-05 16:30:25 +0000237
238 // Declarations are scoped, others (like macros) are assumed global.
Sam McCall661d89c2018-06-05 17:58:12 +0000239 if (SemaCCResult.Declaration)
Sam McCallabe37372018-06-27 11:43:54 +0000240 Scope = std::min(Scope, ComputeScope(SemaCCResult.Declaration));
Sam McCallc5707b62018-05-15 17:43:27 +0000241}
242
Sam McCall3f0243f2018-07-03 08:09:29 +0000243static std::pair<float, unsigned> proximityScore(llvm::StringRef SymbolURI,
244 URIDistance *D) {
245 if (!D || SymbolURI.empty())
246 return {0.f, 0u};
247 unsigned Distance = D->distance(SymbolURI);
248 // Assume approximately default options are used for sensible scoring.
249 return {std::exp(Distance * -0.4f / FileDistanceOptions().UpCost), Distance};
250}
251
Sam McCallc5707b62018-05-15 17:43:27 +0000252float SymbolRelevanceSignals::evaluate() const {
Sam McCalld9b54f02018-06-05 16:30:25 +0000253 float Score = 1;
254
Sam McCallc5707b62018-05-15 17:43:27 +0000255 if (Forbidden)
256 return 0;
Ilya Biryukovf0296462018-06-04 14:50:59 +0000257
Sam McCalld9b54f02018-06-05 16:30:25 +0000258 Score *= NameMatch;
259
Ilya Biryukovf0296462018-06-04 14:50:59 +0000260 // Proximity scores are [0,1] and we translate them into a multiplier in the
Sam McCall3f0243f2018-07-03 08:09:29 +0000261 // range from 1 to 3.
262 Score *= 1 + 2 * std::max(proximityScore(SymbolURI, FileProximityMatch).first,
263 SemaProximityScore);
Sam McCalld9b54f02018-06-05 16:30:25 +0000264
265 // Symbols like local variables may only be referenced within their scope.
266 // Conversely if we're in that scope, it's likely we'll reference them.
267 if (Query == CodeComplete) {
268 // The narrower the scope where a symbol is visible, the more likely it is
269 // to be relevant when it is available.
270 switch (Scope) {
271 case GlobalScope:
272 break;
273 case FileScope:
274 Score *= 1.5;
Sam McCallc22c9aa2018-06-07 08:16:36 +0000275 break;
Sam McCalld9b54f02018-06-05 16:30:25 +0000276 case ClassScope:
277 Score *= 2;
Sam McCallc22c9aa2018-06-07 08:16:36 +0000278 break;
Sam McCalld9b54f02018-06-05 16:30:25 +0000279 case FunctionScope:
280 Score *= 4;
Sam McCallc22c9aa2018-06-07 08:16:36 +0000281 break;
Sam McCalld9b54f02018-06-05 16:30:25 +0000282 }
283 }
284
Ilya Biryukovf0296462018-06-04 14:50:59 +0000285 return Score;
Sam McCallc5707b62018-05-15 17:43:27 +0000286}
Eric Liu09c3c372018-06-15 08:58:12 +0000287
Sam McCallc5707b62018-05-15 17:43:27 +0000288raw_ostream &operator<<(raw_ostream &OS, const SymbolRelevanceSignals &S) {
289 OS << formatv("=== Symbol relevance: {0}\n", S.evaluate());
290 OS << formatv("\tName match: {0}\n", S.NameMatch);
291 OS << formatv("\tForbidden: {0}\n", S.Forbidden);
Eric Liu09c3c372018-06-15 08:58:12 +0000292 OS << formatv("\tSymbol URI: {0}\n", S.SymbolURI);
293 if (S.FileProximityMatch) {
Sam McCall3f0243f2018-07-03 08:09:29 +0000294 auto Score = proximityScore(S.SymbolURI, S.FileProximityMatch);
295 OS << formatv("\tIndex proximity: {0} (distance={1})\n", Score.first,
296 Score.second);
Eric Liu09c3c372018-06-15 08:58:12 +0000297 }
298 OS << formatv("\tSema proximity: {0}\n", S.SemaProximityScore);
Sam McCall661d89c2018-06-05 17:58:12 +0000299 OS << formatv("\tQuery type: {0}\n", static_cast<int>(S.Query));
300 OS << formatv("\tScope: {0}\n", static_cast<int>(S.Scope));
Sam McCallc5707b62018-05-15 17:43:27 +0000301 return OS;
302}
303
304float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance) {
305 return SymbolQuality * SymbolRelevance;
306}
307
308// Produces an integer that sorts in the same order as F.
309// That is: a < b <==> encodeFloat(a) < encodeFloat(b).
310static uint32_t encodeFloat(float F) {
311 static_assert(std::numeric_limits<float>::is_iec559, "");
312 constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1);
313
314 // Get the bits of the float. Endianness is the same as for integers.
315 uint32_t U = FloatToBits(F);
316 // IEEE 754 floats compare like sign-magnitude integers.
317 if (U & TopBit) // Negative float.
318 return 0 - U; // Map onto the low half of integers, order reversed.
319 return U + TopBit; // Positive floats map onto the high half of integers.
320}
321
322std::string sortText(float Score, llvm::StringRef Name) {
323 // We convert -Score to an integer, and hex-encode for readability.
324 // Example: [0.5, "foo"] -> "41000000foo"
325 std::string S;
326 llvm::raw_string_ostream OS(S);
327 write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower,
328 /*Width=*/2 * sizeof(Score));
329 OS << Name;
330 OS.flush();
331 return S;
332}
333
334} // namespace clangd
335} // namespace clang