Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 1 | //===--- FileDistance.cpp - File contents container -------------*- C++ -*-===// |
| 2 | // |
Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // The FileDistance structure allows calculating the minimum distance to paths |
| 10 | // in a single tree. |
| 11 | // We simply walk up the path's ancestors until we find a node whose cost is |
| 12 | // known, and add the cost of walking back down. Initialization ensures this |
| 13 | // gives the correct path to the roots. |
| 14 | // We cache the results, so that the runtime is O(|A|), where A is the set of |
| 15 | // all distinct ancestors of visited paths. |
| 16 | // |
| 17 | // Example after initialization with /=2, /bar=0, DownCost = 1: |
| 18 | // / = 2 |
| 19 | // /bar = 0 |
| 20 | // |
| 21 | // After querying /foo/bar and /bar/foo: |
| 22 | // / = 2 |
| 23 | // /bar = 0 |
| 24 | // /bar/foo = 1 |
| 25 | // /foo = 3 |
| 26 | // /foo/bar = 4 |
| 27 | // |
| 28 | // URIDistance creates FileDistance lazily for each URI scheme encountered. In |
| 29 | // practice this is a small constant factor. |
| 30 | // |
| 31 | //===-------------------------------------------------------------------------// |
| 32 | |
| 33 | #include "FileDistance.h" |
| 34 | #include "Logger.h" |
| 35 | #include "llvm/ADT/STLExtras.h" |
| 36 | #include <queue> |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 37 | |
| 38 | namespace clang { |
| 39 | namespace clangd { |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 40 | |
| 41 | // Convert a path into the canonical form. |
| 42 | // Canonical form is either "/", or "/segment" * N: |
| 43 | // C:\foo\bar --> /c:/foo/bar |
| 44 | // /foo/ --> /foo |
| 45 | // a/b/c --> /a/b/c |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 46 | static llvm::SmallString<128> canonicalize(llvm::StringRef Path) { |
| 47 | llvm::SmallString<128> Result = Path.rtrim('/'); |
| 48 | native(Result, llvm::sys::path::Style::posix); |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 49 | if (Result.empty() || Result.front() != '/') |
| 50 | Result.insert(Result.begin(), '/'); |
| 51 | return Result; |
| 52 | } |
| 53 | |
Kirill Bobyrev | c38b3f0 | 2018-08-31 08:29:48 +0000 | [diff] [blame] | 54 | constexpr const unsigned FileDistance::Unreachable; |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 55 | const llvm::hash_code FileDistance::RootHash = |
| 56 | llvm::hash_value(llvm::StringRef("/")); |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 57 | |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 58 | FileDistance::FileDistance(llvm::StringMap<SourceParams> Sources, |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 59 | const FileDistanceOptions &Opts) |
| 60 | : Opts(Opts) { |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 61 | llvm::DenseMap<llvm::hash_code, llvm::SmallVector<llvm::hash_code, 4>> |
| 62 | DownEdges; |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 63 | // Compute the best distance following only up edges. |
| 64 | // Keep track of down edges, in case we can use them to improve on this. |
| 65 | for (const auto &S : Sources) { |
| 66 | auto Canonical = canonicalize(S.getKey()); |
Sam McCall | bed5885 | 2018-07-11 10:35:11 +0000 | [diff] [blame] | 67 | dlog("Source {0} = {1}, MaxUp = {2}", Canonical, S.second.Cost, |
| 68 | S.second.MaxUpTraversals); |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 69 | // Walk up to ancestors of this source, assigning cost. |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 70 | llvm::StringRef Rest = Canonical; |
| 71 | llvm::hash_code Hash = llvm::hash_value(Rest); |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 72 | for (unsigned I = 0; !Rest.empty(); ++I) { |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 73 | Rest = parent_path(Rest, llvm::sys::path::Style::posix); |
| 74 | auto NextHash = llvm::hash_value(Rest); |
Sam McCall | 7c96bb6 | 2018-07-04 08:27:28 +0000 | [diff] [blame] | 75 | auto &Down = DownEdges[NextHash]; |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 76 | if (!llvm::is_contained(Down, Hash)) |
Fangrui Song | 8380c9e | 2018-10-07 17:21:08 +0000 | [diff] [blame] | 77 | Down.push_back(Hash); |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 78 | // We can't just break after MaxUpTraversals, must still set DownEdges. |
| 79 | if (I > S.getValue().MaxUpTraversals) { |
| 80 | if (Cache.find(Hash) != Cache.end()) |
| 81 | break; |
| 82 | } else { |
| 83 | unsigned Cost = S.getValue().Cost + I * Opts.UpCost; |
| 84 | auto R = Cache.try_emplace(Hash, Cost); |
| 85 | if (!R.second) { |
| 86 | if (Cost < R.first->second) { |
| 87 | R.first->second = Cost; |
| 88 | } else { |
| 89 | // If we're not the best way to get to this path, stop assigning. |
| 90 | break; |
| 91 | } |
| 92 | } |
| 93 | } |
| 94 | Hash = NextHash; |
| 95 | } |
| 96 | } |
| 97 | // Now propagate scores parent -> child if that's an improvement. |
| 98 | // BFS ensures we propagate down chains (must visit parents before children). |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 99 | std::queue<llvm::hash_code> Next; |
| 100 | for (auto Child : DownEdges.lookup(llvm::hash_value(llvm::StringRef("")))) |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 101 | Next.push(Child); |
| 102 | while (!Next.empty()) { |
Eric Liu | 0c29722c | 2018-10-16 10:41:17 +0000 | [diff] [blame] | 103 | auto Parent = Next.front(); |
| 104 | Next.pop(); |
| 105 | auto ParentCost = Cache.lookup(Parent); |
| 106 | for (auto Child : DownEdges.lookup(Parent)) { |
| 107 | if (Parent != RootHash || Opts.AllowDownTraversalFromRoot) { |
| 108 | auto &ChildCost = |
| 109 | Cache.try_emplace(Child, Unreachable).first->getSecond(); |
| 110 | if (ParentCost + Opts.DownCost < ChildCost) |
| 111 | ChildCost = ParentCost + Opts.DownCost; |
| 112 | } |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 113 | Next.push(Child); |
| 114 | } |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 115 | } |
| 116 | } |
| 117 | |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 118 | unsigned FileDistance::distance(llvm::StringRef Path) { |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 119 | auto Canonical = canonicalize(Path); |
Kirill Bobyrev | c1bb7b9 | 2018-08-31 08:19:50 +0000 | [diff] [blame] | 120 | unsigned Cost = Unreachable; |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 121 | llvm::SmallVector<llvm::hash_code, 16> Ancestors; |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 122 | // Walk up ancestors until we find a path we know the distance for. |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 123 | for (llvm::StringRef Rest = Canonical; !Rest.empty(); |
| 124 | Rest = parent_path(Rest, llvm::sys::path::Style::posix)) { |
| 125 | auto Hash = llvm::hash_value(Rest); |
Eric Liu | 0c29722c | 2018-10-16 10:41:17 +0000 | [diff] [blame] | 126 | if (Hash == RootHash && !Ancestors.empty() && |
| 127 | !Opts.AllowDownTraversalFromRoot) { |
| 128 | Cost = Unreachable; |
| 129 | break; |
| 130 | } |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 131 | auto It = Cache.find(Hash); |
| 132 | if (It != Cache.end()) { |
| 133 | Cost = It->second; |
| 134 | break; |
| 135 | } |
| 136 | Ancestors.push_back(Hash); |
| 137 | } |
| 138 | // Now we know the costs for (known node, queried node]. |
| 139 | // Fill these in, walking down the directory tree. |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 140 | for (llvm::hash_code Hash : llvm::reverse(Ancestors)) { |
Kirill Bobyrev | c1bb7b9 | 2018-08-31 08:19:50 +0000 | [diff] [blame] | 141 | if (Cost != Unreachable) |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 142 | Cost += Opts.DownCost; |
| 143 | Cache.try_emplace(Hash, Cost); |
| 144 | } |
Sam McCall | bed5885 | 2018-07-11 10:35:11 +0000 | [diff] [blame] | 145 | dlog("distance({0} = {1})", Path, Cost); |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 146 | return Cost; |
| 147 | } |
| 148 | |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 149 | unsigned URIDistance::distance(llvm::StringRef URI) { |
| 150 | auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::Unreachable); |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 151 | if (!R.second) |
| 152 | return R.first->getSecond(); |
| 153 | if (auto U = clangd::URI::parse(URI)) { |
Sam McCall | bed5885 | 2018-07-11 10:35:11 +0000 | [diff] [blame] | 154 | dlog("distance({0} = {1})", URI, U->body()); |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 155 | R.first->second = forScheme(U->scheme()).distance(U->body()); |
| 156 | } else { |
Sam McCall | bed5885 | 2018-07-11 10:35:11 +0000 | [diff] [blame] | 157 | log("URIDistance::distance() of unparseable {0}: {1}", URI, U.takeError()); |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 158 | } |
| 159 | return R.first->second; |
| 160 | } |
| 161 | |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 162 | FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) { |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 163 | auto &Delegate = ByScheme[Scheme]; |
| 164 | if (!Delegate) { |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 165 | llvm::StringMap<SourceParams> SchemeSources; |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 166 | for (const auto &Source : Sources) { |
| 167 | if (auto U = clangd::URI::create(Source.getKey(), Scheme)) |
| 168 | SchemeSources.try_emplace(U->body(), Source.getValue()); |
| 169 | else |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 170 | llvm::consumeError(U.takeError()); |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 171 | } |
Sam McCall | bed5885 | 2018-07-11 10:35:11 +0000 | [diff] [blame] | 172 | dlog("FileDistance for scheme {0}: {1}/{2} sources", Scheme, |
| 173 | SchemeSources.size(), Sources.size()); |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 174 | Delegate.reset(new FileDistance(std::move(SchemeSources), Opts)); |
| 175 | } |
| 176 | return *Delegate; |
| 177 | } |
| 178 | |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 179 | static std::pair<std::string, int> scopeToPath(llvm::StringRef Scope) { |
| 180 | llvm::SmallVector<llvm::StringRef, 4> Split; |
Eric Liu | 3fac4ef | 2018-10-17 11:19:02 +0000 | [diff] [blame] | 181 | Scope.split(Split, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false); |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 182 | return {"/" + llvm::join(Split, "/"), Split.size()}; |
Eric Liu | 3fac4ef | 2018-10-17 11:19:02 +0000 | [diff] [blame] | 183 | } |
| 184 | |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 185 | static FileDistance |
| 186 | createScopeFileDistance(llvm::ArrayRef<std::string> QueryScopes) { |
Eric Liu | 3fac4ef | 2018-10-17 11:19:02 +0000 | [diff] [blame] | 187 | FileDistanceOptions Opts; |
| 188 | Opts.UpCost = 2; |
| 189 | Opts.DownCost = 4; |
| 190 | Opts.AllowDownTraversalFromRoot = false; |
| 191 | |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 192 | llvm::StringMap<SourceParams> Sources; |
| 193 | llvm::StringRef Preferred = |
| 194 | QueryScopes.empty() ? "" : QueryScopes.front().c_str(); |
| 195 | for (llvm::StringRef S : QueryScopes) { |
Eric Liu | 3fac4ef | 2018-10-17 11:19:02 +0000 | [diff] [blame] | 196 | SourceParams Param; |
| 197 | // Penalize the global scope even it's preferred, as all projects can define |
| 198 | // symbols in it, and there is pattern where using-namespace is used in |
| 199 | // place of enclosing namespaces (e.g. in implementation files). |
| 200 | if (S == Preferred) |
Eric Liu | 4fac50e | 2018-11-26 12:12:01 +0000 | [diff] [blame] | 201 | Param.Cost = S == "" ? 4 : 0; |
Eric Liu | 3fac4ef | 2018-10-17 11:19:02 +0000 | [diff] [blame] | 202 | else if (Preferred.startswith(S) && !S.empty()) |
| 203 | continue; // just rely on up-traversals. |
| 204 | else |
Eric Liu | 4fac50e | 2018-11-26 12:12:01 +0000 | [diff] [blame] | 205 | Param.Cost = S == "" ? 6 : 2; |
Eric Liu | 3fac4ef | 2018-10-17 11:19:02 +0000 | [diff] [blame] | 206 | auto Path = scopeToPath(S); |
| 207 | // The global namespace is not 'near' its children. |
| 208 | Param.MaxUpTraversals = std::max(Path.second - 1, 0); |
| 209 | Sources[Path.first] = std::move(Param); |
| 210 | } |
| 211 | return FileDistance(Sources, Opts); |
| 212 | } |
| 213 | |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 214 | ScopeDistance::ScopeDistance(llvm::ArrayRef<std::string> QueryScopes) |
Eric Liu | 3fac4ef | 2018-10-17 11:19:02 +0000 | [diff] [blame] | 215 | : Distance(createScopeFileDistance(QueryScopes)) {} |
| 216 | |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 217 | unsigned ScopeDistance::distance(llvm::StringRef SymbolScope) { |
Eric Liu | 3fac4ef | 2018-10-17 11:19:02 +0000 | [diff] [blame] | 218 | return Distance.distance(scopeToPath(SymbolScope).first); |
| 219 | } |
| 220 | |
Sam McCall | 3f0243f | 2018-07-03 08:09:29 +0000 | [diff] [blame] | 221 | } // namespace clangd |
| 222 | } // namespace clang |