Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 1 | //===--- Quality.cpp --------------------------------------------*- C++-*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===---------------------------------------------------------------------===// |
| 9 | #include "Quality.h" |
| 10 | #include "index/Index.h" |
Ilya Biryukov | f029646 | 2018-06-04 14:50:59 +0000 | [diff] [blame] | 11 | #include "clang/AST/ASTContext.h" |
Sam McCall | 4a3c69b | 2018-06-06 08:53:36 +0000 | [diff] [blame] | 12 | #include "clang/AST/DeclVisitor.h" |
Ilya Biryukov | f029646 | 2018-06-04 14:50:59 +0000 | [diff] [blame] | 13 | #include "clang/Basic/SourceManager.h" |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 14 | #include "clang/Sema/CodeCompleteConsumer.h" |
| 15 | #include "llvm/Support/FormatVariadic.h" |
| 16 | #include "llvm/Support/MathExtras.h" |
| 17 | #include "llvm/Support/raw_ostream.h" |
| 18 | |
| 19 | namespace clang { |
| 20 | namespace clangd { |
| 21 | using namespace llvm; |
| 22 | |
Ilya Biryukov | f029646 | 2018-06-04 14:50:59 +0000 | [diff] [blame] | 23 | static bool hasDeclInMainFile(const Decl &D) { |
| 24 | auto &SourceMgr = D.getASTContext().getSourceManager(); |
| 25 | for (auto *Redecl : D.redecls()) { |
| 26 | auto Loc = SourceMgr.getSpellingLoc(Redecl->getLocation()); |
| 27 | if (SourceMgr.isWrittenInMainFile(Loc)) |
| 28 | return true; |
| 29 | } |
| 30 | return false; |
| 31 | } |
| 32 | |
Sam McCall | 4a3c69b | 2018-06-06 08:53:36 +0000 | [diff] [blame] | 33 | static SymbolQualitySignals::SymbolCategory categorize(const NamedDecl &ND) { |
| 34 | class Switch |
| 35 | : public ConstDeclVisitor<Switch, SymbolQualitySignals::SymbolCategory> { |
| 36 | public: |
| 37 | #define MAP(DeclType, Category) \ |
| 38 | SymbolQualitySignals::SymbolCategory Visit##DeclType(const DeclType *) { \ |
| 39 | return SymbolQualitySignals::Category; \ |
| 40 | } |
| 41 | MAP(NamespaceDecl, Namespace); |
| 42 | MAP(NamespaceAliasDecl, Namespace); |
| 43 | MAP(TypeDecl, Type); |
| 44 | MAP(TypeAliasTemplateDecl, Type); |
| 45 | MAP(ClassTemplateDecl, Type); |
| 46 | MAP(ValueDecl, Variable); |
| 47 | MAP(VarTemplateDecl, Variable); |
| 48 | MAP(FunctionDecl, Function); |
| 49 | MAP(FunctionTemplateDecl, Function); |
| 50 | MAP(Decl, Unknown); |
| 51 | #undef MAP |
| 52 | }; |
| 53 | return Switch().Visit(&ND); |
| 54 | } |
| 55 | |
| 56 | static SymbolQualitySignals::SymbolCategory |
| 57 | categorize(const index::SymbolInfo &D) { |
| 58 | switch (D.Kind) { |
| 59 | case index::SymbolKind::Namespace: |
| 60 | case index::SymbolKind::NamespaceAlias: |
| 61 | return SymbolQualitySignals::Namespace; |
| 62 | case index::SymbolKind::Macro: |
| 63 | return SymbolQualitySignals::Macro; |
| 64 | case index::SymbolKind::Enum: |
| 65 | case index::SymbolKind::Struct: |
| 66 | case index::SymbolKind::Class: |
| 67 | case index::SymbolKind::Protocol: |
| 68 | case index::SymbolKind::Extension: |
| 69 | case index::SymbolKind::Union: |
| 70 | case index::SymbolKind::TypeAlias: |
| 71 | return SymbolQualitySignals::Type; |
| 72 | case index::SymbolKind::Function: |
| 73 | case index::SymbolKind::ClassMethod: |
| 74 | case index::SymbolKind::InstanceMethod: |
| 75 | case index::SymbolKind::StaticMethod: |
| 76 | case index::SymbolKind::InstanceProperty: |
| 77 | case index::SymbolKind::ClassProperty: |
| 78 | case index::SymbolKind::StaticProperty: |
| 79 | case index::SymbolKind::Constructor: |
| 80 | case index::SymbolKind::Destructor: |
| 81 | case index::SymbolKind::ConversionFunction: |
| 82 | return SymbolQualitySignals::Function; |
| 83 | case index::SymbolKind::Variable: |
| 84 | case index::SymbolKind::Field: |
| 85 | case index::SymbolKind::EnumConstant: |
| 86 | case index::SymbolKind::Parameter: |
| 87 | return SymbolQualitySignals::Variable; |
| 88 | case index::SymbolKind::Using: |
| 89 | case index::SymbolKind::Module: |
| 90 | case index::SymbolKind::Unknown: |
| 91 | return SymbolQualitySignals::Unknown; |
| 92 | } |
Simon Pilgrim | 0c9e1c8 | 2018-06-06 12:48:27 +0000 | [diff] [blame^] | 93 | llvm_unreachable("Unknown index::SymbolKind") |
Sam McCall | 4a3c69b | 2018-06-06 08:53:36 +0000 | [diff] [blame] | 94 | } |
| 95 | |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 96 | void SymbolQualitySignals::merge(const CodeCompletionResult &SemaCCResult) { |
| 97 | SemaCCPriority = SemaCCResult.Priority; |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 98 | if (SemaCCResult.Availability == CXAvailability_Deprecated) |
| 99 | Deprecated = true; |
Sam McCall | 4a3c69b | 2018-06-06 08:53:36 +0000 | [diff] [blame] | 100 | |
| 101 | if (SemaCCResult.Declaration) |
| 102 | Category = categorize(*SemaCCResult.Declaration); |
| 103 | else if (SemaCCResult.Kind == CodeCompletionResult::RK_Macro) |
| 104 | Category = Macro; |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 105 | } |
| 106 | |
| 107 | void SymbolQualitySignals::merge(const Symbol &IndexResult) { |
| 108 | References = std::max(IndexResult.References, References); |
Sam McCall | 4a3c69b | 2018-06-06 08:53:36 +0000 | [diff] [blame] | 109 | Category = categorize(IndexResult.SymInfo); |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 110 | } |
| 111 | |
| 112 | float SymbolQualitySignals::evaluate() const { |
| 113 | float Score = 1; |
| 114 | |
| 115 | // This avoids a sharp gradient for tail symbols, and also neatly avoids the |
| 116 | // question of whether 0 references means a bad symbol or missing data. |
| 117 | if (References >= 3) |
| 118 | Score *= std::log(References); |
| 119 | |
| 120 | if (SemaCCPriority) |
| 121 | // Map onto a 0-2 interval, so we don't reward/penalize non-Sema results. |
| 122 | // Priority 80 is a really bad score. |
| 123 | Score *= 2 - std::min<float>(80, SemaCCPriority) / 40; |
| 124 | |
| 125 | if (Deprecated) |
Aaron Ballman | 215e471 | 2018-05-18 13:18:41 +0000 | [diff] [blame] | 126 | Score *= 0.1f; |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 127 | |
Sam McCall | 4a3c69b | 2018-06-06 08:53:36 +0000 | [diff] [blame] | 128 | switch (Category) { |
| 129 | case Type: |
| 130 | case Function: |
| 131 | case Variable: |
Simon Pilgrim | 0c9e1c8 | 2018-06-06 12:48:27 +0000 | [diff] [blame^] | 132 | Score *= 1.1f; |
Sam McCall | 4a3c69b | 2018-06-06 08:53:36 +0000 | [diff] [blame] | 133 | break; |
| 134 | case Namespace: |
Simon Pilgrim | 0c9e1c8 | 2018-06-06 12:48:27 +0000 | [diff] [blame^] | 135 | Score *= 0.8f; |
Sam McCall | bc7cbb7 | 2018-06-06 12:38:37 +0000 | [diff] [blame] | 136 | break; |
Sam McCall | 4a3c69b | 2018-06-06 08:53:36 +0000 | [diff] [blame] | 137 | case Macro: |
Simon Pilgrim | 0c9e1c8 | 2018-06-06 12:48:27 +0000 | [diff] [blame^] | 138 | Score *= 0.2f; |
Sam McCall | 4a3c69b | 2018-06-06 08:53:36 +0000 | [diff] [blame] | 139 | break; |
| 140 | case Unknown: |
| 141 | break; |
| 142 | } |
| 143 | |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 144 | return Score; |
| 145 | } |
| 146 | |
| 147 | raw_ostream &operator<<(raw_ostream &OS, const SymbolQualitySignals &S) { |
| 148 | OS << formatv("=== Symbol quality: {0}\n", S.evaluate()); |
| 149 | if (S.SemaCCPriority) |
| 150 | OS << formatv("\tSemaCCPriority: {0}\n", S.SemaCCPriority); |
| 151 | OS << formatv("\tReferences: {0}\n", S.References); |
| 152 | OS << formatv("\tDeprecated: {0}\n", S.Deprecated); |
Sam McCall | 4a3c69b | 2018-06-06 08:53:36 +0000 | [diff] [blame] | 153 | OS << formatv("\tCategory: {0}\n", static_cast<int>(S.Category)); |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 154 | return OS; |
| 155 | } |
| 156 | |
Sam McCall | d9b54f0 | 2018-06-05 16:30:25 +0000 | [diff] [blame] | 157 | static SymbolRelevanceSignals::AccessibleScope |
| 158 | ComputeScope(const NamedDecl &D) { |
Sam McCall | 89f5293 | 2018-06-05 18:00:48 +0000 | [diff] [blame] | 159 | bool InClass = false; |
Sam McCall | d9b54f0 | 2018-06-05 16:30:25 +0000 | [diff] [blame] | 160 | for (const DeclContext *DC = D.getDeclContext(); !DC->isFileContext(); |
| 161 | DC = DC->getParent()) { |
| 162 | if (DC->isFunctionOrMethod()) |
| 163 | return SymbolRelevanceSignals::FunctionScope; |
| 164 | InClass = InClass || DC->isRecord(); |
| 165 | } |
| 166 | if (InClass) |
| 167 | return SymbolRelevanceSignals::ClassScope; |
| 168 | // This threshold could be tweaked, e.g. to treat module-visible as global. |
| 169 | if (D.getLinkageInternal() < ExternalLinkage) |
| 170 | return SymbolRelevanceSignals::FileScope; |
| 171 | return SymbolRelevanceSignals::GlobalScope; |
| 172 | } |
| 173 | |
| 174 | void SymbolRelevanceSignals::merge(const Symbol &IndexResult) { |
| 175 | // FIXME: Index results always assumed to be at global scope. If Scope becomes |
| 176 | // relevant to non-completion requests, we should recognize class members etc. |
| 177 | } |
| 178 | |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 179 | void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) { |
| 180 | if (SemaCCResult.Availability == CXAvailability_NotAvailable || |
| 181 | SemaCCResult.Availability == CXAvailability_NotAccessible) |
| 182 | Forbidden = true; |
Ilya Biryukov | f029646 | 2018-06-04 14:50:59 +0000 | [diff] [blame] | 183 | |
| 184 | if (SemaCCResult.Declaration) { |
| 185 | // We boost things that have decls in the main file. |
| 186 | // The real proximity scores would be more general when we have them. |
| 187 | float DeclProximity = |
| 188 | hasDeclInMainFile(*SemaCCResult.Declaration) ? 1.0 : 0.0; |
| 189 | ProximityScore = std::max(DeclProximity, ProximityScore); |
| 190 | } |
Sam McCall | d9b54f0 | 2018-06-05 16:30:25 +0000 | [diff] [blame] | 191 | |
| 192 | // Declarations are scoped, others (like macros) are assumed global. |
Sam McCall | 661d89c | 2018-06-05 17:58:12 +0000 | [diff] [blame] | 193 | if (SemaCCResult.Declaration) |
Sam McCall | d9b54f0 | 2018-06-05 16:30:25 +0000 | [diff] [blame] | 194 | Scope = std::min(Scope, ComputeScope(*SemaCCResult.Declaration)); |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 195 | } |
| 196 | |
| 197 | float SymbolRelevanceSignals::evaluate() const { |
Sam McCall | d9b54f0 | 2018-06-05 16:30:25 +0000 | [diff] [blame] | 198 | float Score = 1; |
| 199 | |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 200 | if (Forbidden) |
| 201 | return 0; |
Ilya Biryukov | f029646 | 2018-06-04 14:50:59 +0000 | [diff] [blame] | 202 | |
Sam McCall | d9b54f0 | 2018-06-05 16:30:25 +0000 | [diff] [blame] | 203 | Score *= NameMatch; |
| 204 | |
Ilya Biryukov | f029646 | 2018-06-04 14:50:59 +0000 | [diff] [blame] | 205 | // Proximity scores are [0,1] and we translate them into a multiplier in the |
| 206 | // range from 1 to 2. |
| 207 | Score *= 1 + ProximityScore; |
Sam McCall | d9b54f0 | 2018-06-05 16:30:25 +0000 | [diff] [blame] | 208 | |
| 209 | // Symbols like local variables may only be referenced within their scope. |
| 210 | // Conversely if we're in that scope, it's likely we'll reference them. |
| 211 | if (Query == CodeComplete) { |
| 212 | // The narrower the scope where a symbol is visible, the more likely it is |
| 213 | // to be relevant when it is available. |
| 214 | switch (Scope) { |
| 215 | case GlobalScope: |
| 216 | break; |
| 217 | case FileScope: |
| 218 | Score *= 1.5; |
| 219 | case ClassScope: |
| 220 | Score *= 2; |
| 221 | case FunctionScope: |
| 222 | Score *= 4; |
| 223 | } |
| 224 | } |
| 225 | |
Ilya Biryukov | f029646 | 2018-06-04 14:50:59 +0000 | [diff] [blame] | 226 | return Score; |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 227 | } |
| 228 | raw_ostream &operator<<(raw_ostream &OS, const SymbolRelevanceSignals &S) { |
| 229 | OS << formatv("=== Symbol relevance: {0}\n", S.evaluate()); |
| 230 | OS << formatv("\tName match: {0}\n", S.NameMatch); |
| 231 | OS << formatv("\tForbidden: {0}\n", S.Forbidden); |
Sam McCall | 661d89c | 2018-06-05 17:58:12 +0000 | [diff] [blame] | 232 | OS << formatv("\tProximity: {0}\n", S.ProximityScore); |
| 233 | OS << formatv("\tQuery type: {0}\n", static_cast<int>(S.Query)); |
| 234 | OS << formatv("\tScope: {0}\n", static_cast<int>(S.Scope)); |
Sam McCall | c5707b6 | 2018-05-15 17:43:27 +0000 | [diff] [blame] | 235 | return OS; |
| 236 | } |
| 237 | |
| 238 | float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance) { |
| 239 | return SymbolQuality * SymbolRelevance; |
| 240 | } |
| 241 | |
| 242 | // Produces an integer that sorts in the same order as F. |
| 243 | // That is: a < b <==> encodeFloat(a) < encodeFloat(b). |
| 244 | static uint32_t encodeFloat(float F) { |
| 245 | static_assert(std::numeric_limits<float>::is_iec559, ""); |
| 246 | constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1); |
| 247 | |
| 248 | // Get the bits of the float. Endianness is the same as for integers. |
| 249 | uint32_t U = FloatToBits(F); |
| 250 | // IEEE 754 floats compare like sign-magnitude integers. |
| 251 | if (U & TopBit) // Negative float. |
| 252 | return 0 - U; // Map onto the low half of integers, order reversed. |
| 253 | return U + TopBit; // Positive floats map onto the high half of integers. |
| 254 | } |
| 255 | |
| 256 | std::string sortText(float Score, llvm::StringRef Name) { |
| 257 | // We convert -Score to an integer, and hex-encode for readability. |
| 258 | // Example: [0.5, "foo"] -> "41000000foo" |
| 259 | std::string S; |
| 260 | llvm::raw_string_ostream OS(S); |
| 261 | write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower, |
| 262 | /*Width=*/2 * sizeof(Score)); |
| 263 | OS << Name; |
| 264 | OS.flush(); |
| 265 | return S; |
| 266 | } |
| 267 | |
| 268 | } // namespace clangd |
| 269 | } // namespace clang |