blob: 145814684737ffd8c9be12516a50f08f1f2f3eac [file] [log] [blame]
Sam McCall3f0243f2018-07-03 08:09:29 +00001//===--- FileDistance.cpp - File contents container -------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// The FileDistance structure allows calculating the minimum distance to paths
11// in a single tree.
12// We simply walk up the path's ancestors until we find a node whose cost is
13// known, and add the cost of walking back down. Initialization ensures this
14// gives the correct path to the roots.
15// We cache the results, so that the runtime is O(|A|), where A is the set of
16// all distinct ancestors of visited paths.
17//
18// Example after initialization with /=2, /bar=0, DownCost = 1:
19// / = 2
20// /bar = 0
21//
22// After querying /foo/bar and /bar/foo:
23// / = 2
24// /bar = 0
25// /bar/foo = 1
26// /foo = 3
27// /foo/bar = 4
28//
29// URIDistance creates FileDistance lazily for each URI scheme encountered. In
30// practice this is a small constant factor.
31//
32//===-------------------------------------------------------------------------//
33
34#include "FileDistance.h"
35#include "Logger.h"
36#include "llvm/ADT/STLExtras.h"
37#include <queue>
Sam McCall3f0243f2018-07-03 08:09:29 +000038
Sam McCallc008af62018-10-20 15:30:37 +000039using namespace llvm;
Sam McCall3f0243f2018-07-03 08:09:29 +000040namespace clang {
41namespace clangd {
Sam McCall3f0243f2018-07-03 08:09:29 +000042
43// Convert a path into the canonical form.
44// Canonical form is either "/", or "/segment" * N:
45// C:\foo\bar --> /c:/foo/bar
46// /foo/ --> /foo
47// a/b/c --> /a/b/c
48static SmallString<128> canonicalize(StringRef Path) {
49 SmallString<128> Result = Path.rtrim('/');
50 native(Result, sys::path::Style::posix);
51 if (Result.empty() || Result.front() != '/')
52 Result.insert(Result.begin(), '/');
53 return Result;
54}
55
Kirill Bobyrevc38b3f02018-08-31 08:29:48 +000056constexpr const unsigned FileDistance::Unreachable;
Sam McCallc008af62018-10-20 15:30:37 +000057const hash_code FileDistance::RootHash = hash_value(StringRef("/"));
Sam McCall3f0243f2018-07-03 08:09:29 +000058
59FileDistance::FileDistance(StringMap<SourceParams> Sources,
60 const FileDistanceOptions &Opts)
61 : Opts(Opts) {
Sam McCallc008af62018-10-20 15:30:37 +000062 DenseMap<hash_code, SmallVector<hash_code, 4>> DownEdges;
Sam McCall3f0243f2018-07-03 08:09:29 +000063 // Compute the best distance following only up edges.
64 // Keep track of down edges, in case we can use them to improve on this.
65 for (const auto &S : Sources) {
66 auto Canonical = canonicalize(S.getKey());
Sam McCallbed58852018-07-11 10:35:11 +000067 dlog("Source {0} = {1}, MaxUp = {2}", Canonical, S.second.Cost,
68 S.second.MaxUpTraversals);
Sam McCall3f0243f2018-07-03 08:09:29 +000069 // Walk up to ancestors of this source, assigning cost.
70 StringRef Rest = Canonical;
Sam McCallc008af62018-10-20 15:30:37 +000071 hash_code Hash = hash_value(Rest);
Sam McCall3f0243f2018-07-03 08:09:29 +000072 for (unsigned I = 0; !Rest.empty(); ++I) {
73 Rest = parent_path(Rest, sys::path::Style::posix);
74 auto NextHash = hash_value(Rest);
Sam McCall7c96bb62018-07-04 08:27:28 +000075 auto &Down = DownEdges[NextHash];
Sam McCallc008af62018-10-20 15:30:37 +000076 if (!is_contained(Down, Hash))
Fangrui Song8380c9e2018-10-07 17:21:08 +000077 Down.push_back(Hash);
Sam McCall3f0243f2018-07-03 08:09:29 +000078 // We can't just break after MaxUpTraversals, must still set DownEdges.
79 if (I > S.getValue().MaxUpTraversals) {
80 if (Cache.find(Hash) != Cache.end())
81 break;
82 } else {
83 unsigned Cost = S.getValue().Cost + I * Opts.UpCost;
84 auto R = Cache.try_emplace(Hash, Cost);
85 if (!R.second) {
86 if (Cost < R.first->second) {
87 R.first->second = Cost;
88 } else {
89 // If we're not the best way to get to this path, stop assigning.
90 break;
91 }
92 }
93 }
94 Hash = NextHash;
95 }
96 }
97 // Now propagate scores parent -> child if that's an improvement.
98 // BFS ensures we propagate down chains (must visit parents before children).
99 std::queue<hash_code> Next;
Sam McCallc008af62018-10-20 15:30:37 +0000100 for (auto Child : DownEdges.lookup(hash_value(StringRef(""))))
Sam McCall3f0243f2018-07-03 08:09:29 +0000101 Next.push(Child);
102 while (!Next.empty()) {
Eric Liu0c29722c2018-10-16 10:41:17 +0000103 auto Parent = Next.front();
104 Next.pop();
105 auto ParentCost = Cache.lookup(Parent);
106 for (auto Child : DownEdges.lookup(Parent)) {
107 if (Parent != RootHash || Opts.AllowDownTraversalFromRoot) {
108 auto &ChildCost =
109 Cache.try_emplace(Child, Unreachable).first->getSecond();
110 if (ParentCost + Opts.DownCost < ChildCost)
111 ChildCost = ParentCost + Opts.DownCost;
112 }
Sam McCall3f0243f2018-07-03 08:09:29 +0000113 Next.push(Child);
114 }
Sam McCall3f0243f2018-07-03 08:09:29 +0000115 }
116}
117
118unsigned FileDistance::distance(StringRef Path) {
119 auto Canonical = canonicalize(Path);
Kirill Bobyrevc1bb7b92018-08-31 08:19:50 +0000120 unsigned Cost = Unreachable;
Sam McCall3f0243f2018-07-03 08:09:29 +0000121 SmallVector<hash_code, 16> Ancestors;
122 // Walk up ancestors until we find a path we know the distance for.
123 for (StringRef Rest = Canonical; !Rest.empty();
124 Rest = parent_path(Rest, sys::path::Style::posix)) {
125 auto Hash = hash_value(Rest);
Eric Liu0c29722c2018-10-16 10:41:17 +0000126 if (Hash == RootHash && !Ancestors.empty() &&
127 !Opts.AllowDownTraversalFromRoot) {
128 Cost = Unreachable;
129 break;
130 }
Sam McCall3f0243f2018-07-03 08:09:29 +0000131 auto It = Cache.find(Hash);
132 if (It != Cache.end()) {
133 Cost = It->second;
134 break;
135 }
136 Ancestors.push_back(Hash);
137 }
138 // Now we know the costs for (known node, queried node].
139 // Fill these in, walking down the directory tree.
140 for (hash_code Hash : reverse(Ancestors)) {
Kirill Bobyrevc1bb7b92018-08-31 08:19:50 +0000141 if (Cost != Unreachable)
Sam McCall3f0243f2018-07-03 08:09:29 +0000142 Cost += Opts.DownCost;
143 Cache.try_emplace(Hash, Cost);
144 }
Sam McCallbed58852018-07-11 10:35:11 +0000145 dlog("distance({0} = {1})", Path, Cost);
Sam McCall3f0243f2018-07-03 08:09:29 +0000146 return Cost;
147}
148
Sam McCallc008af62018-10-20 15:30:37 +0000149unsigned URIDistance::distance(StringRef URI) {
150 auto R = Cache.try_emplace(hash_value(URI), FileDistance::Unreachable);
Sam McCall3f0243f2018-07-03 08:09:29 +0000151 if (!R.second)
152 return R.first->getSecond();
153 if (auto U = clangd::URI::parse(URI)) {
Sam McCallbed58852018-07-11 10:35:11 +0000154 dlog("distance({0} = {1})", URI, U->body());
Sam McCall3f0243f2018-07-03 08:09:29 +0000155 R.first->second = forScheme(U->scheme()).distance(U->body());
156 } else {
Sam McCallbed58852018-07-11 10:35:11 +0000157 log("URIDistance::distance() of unparseable {0}: {1}", URI, U.takeError());
Sam McCall3f0243f2018-07-03 08:09:29 +0000158 }
159 return R.first->second;
160}
161
Sam McCallc008af62018-10-20 15:30:37 +0000162FileDistance &URIDistance::forScheme(StringRef Scheme) {
Sam McCall3f0243f2018-07-03 08:09:29 +0000163 auto &Delegate = ByScheme[Scheme];
164 if (!Delegate) {
Sam McCallc008af62018-10-20 15:30:37 +0000165 StringMap<SourceParams> SchemeSources;
Sam McCall3f0243f2018-07-03 08:09:29 +0000166 for (const auto &Source : Sources) {
167 if (auto U = clangd::URI::create(Source.getKey(), Scheme))
168 SchemeSources.try_emplace(U->body(), Source.getValue());
169 else
170 consumeError(U.takeError());
171 }
Sam McCallbed58852018-07-11 10:35:11 +0000172 dlog("FileDistance for scheme {0}: {1}/{2} sources", Scheme,
173 SchemeSources.size(), Sources.size());
Sam McCall3f0243f2018-07-03 08:09:29 +0000174 Delegate.reset(new FileDistance(std::move(SchemeSources), Opts));
175 }
176 return *Delegate;
177}
178
Sam McCallc008af62018-10-20 15:30:37 +0000179static std::pair<std::string, int> scopeToPath(StringRef Scope) {
Eric Liu3fac4ef2018-10-17 11:19:02 +0000180 SmallVector<StringRef, 4> Split;
181 Scope.split(Split, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
Sam McCallc008af62018-10-20 15:30:37 +0000182 return {"/" + join(Split, "/"), Split.size()};
Eric Liu3fac4ef2018-10-17 11:19:02 +0000183}
184
Sam McCallc008af62018-10-20 15:30:37 +0000185static FileDistance createScopeFileDistance(ArrayRef<std::string> QueryScopes) {
Eric Liu3fac4ef2018-10-17 11:19:02 +0000186 FileDistanceOptions Opts;
187 Opts.UpCost = 2;
188 Opts.DownCost = 4;
189 Opts.AllowDownTraversalFromRoot = false;
190
191 StringMap<SourceParams> Sources;
192 StringRef Preferred = QueryScopes.empty() ? "" : QueryScopes.front().c_str();
193 for (StringRef S : QueryScopes) {
194 SourceParams Param;
195 // Penalize the global scope even it's preferred, as all projects can define
196 // symbols in it, and there is pattern where using-namespace is used in
197 // place of enclosing namespaces (e.g. in implementation files).
198 if (S == Preferred)
199 Param.Cost = S == "" ? 2 : 0;
200 else if (Preferred.startswith(S) && !S.empty())
201 continue; // just rely on up-traversals.
202 else
203 Param.Cost = S == "" ? 5 : 2;
204 auto Path = scopeToPath(S);
205 // The global namespace is not 'near' its children.
206 Param.MaxUpTraversals = std::max(Path.second - 1, 0);
207 Sources[Path.first] = std::move(Param);
208 }
209 return FileDistance(Sources, Opts);
210}
211
Sam McCallc008af62018-10-20 15:30:37 +0000212ScopeDistance::ScopeDistance(ArrayRef<std::string> QueryScopes)
Eric Liu3fac4ef2018-10-17 11:19:02 +0000213 : Distance(createScopeFileDistance(QueryScopes)) {}
214
Sam McCallc008af62018-10-20 15:30:37 +0000215unsigned ScopeDistance::distance(StringRef SymbolScope) {
Eric Liu3fac4ef2018-10-17 11:19:02 +0000216 return Distance.distance(scopeToPath(SymbolScope).first);
217}
218
Sam McCall3f0243f2018-07-03 08:09:29 +0000219} // namespace clangd
220} // namespace clang