blob: a0ce25b48faaa5ba7eb2efe01511278dca5e757e [file] [log] [blame]
Sam McCall3f0243f2018-07-03 08:09:29 +00001//===--- FileDistance.cpp - File contents container -------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// The FileDistance structure allows calculating the minimum distance to paths
11// in a single tree.
12// We simply walk up the path's ancestors until we find a node whose cost is
13// known, and add the cost of walking back down. Initialization ensures this
14// gives the correct path to the roots.
15// We cache the results, so that the runtime is O(|A|), where A is the set of
16// all distinct ancestors of visited paths.
17//
18// Example after initialization with /=2, /bar=0, DownCost = 1:
19// / = 2
20// /bar = 0
21//
22// After querying /foo/bar and /bar/foo:
23// / = 2
24// /bar = 0
25// /bar/foo = 1
26// /foo = 3
27// /foo/bar = 4
28//
29// URIDistance creates FileDistance lazily for each URI scheme encountered. In
30// practice this is a small constant factor.
31//
32//===-------------------------------------------------------------------------//
33
34#include "FileDistance.h"
35#include "Logger.h"
36#include "llvm/ADT/STLExtras.h"
37#include <queue>
Sam McCall3f0243f2018-07-03 08:09:29 +000038
39namespace clang {
40namespace clangd {
41using namespace llvm;
42
43// Convert a path into the canonical form.
44// Canonical form is either "/", or "/segment" * N:
45// C:\foo\bar --> /c:/foo/bar
46// /foo/ --> /foo
47// a/b/c --> /a/b/c
48static SmallString<128> canonicalize(StringRef Path) {
49 SmallString<128> Result = Path.rtrim('/');
50 native(Result, sys::path::Style::posix);
51 if (Result.empty() || Result.front() != '/')
52 Result.insert(Result.begin(), '/');
53 return Result;
54}
55
Sam McCall12bee932018-07-04 08:52:13 +000056constexpr const unsigned FileDistance::kUnreachable;
Sam McCall3f0243f2018-07-03 08:09:29 +000057
58FileDistance::FileDistance(StringMap<SourceParams> Sources,
59 const FileDistanceOptions &Opts)
60 : Opts(Opts) {
61 llvm::DenseMap<hash_code, SmallVector<hash_code, 4>> DownEdges;
62 // Compute the best distance following only up edges.
63 // Keep track of down edges, in case we can use them to improve on this.
64 for (const auto &S : Sources) {
65 auto Canonical = canonicalize(S.getKey());
Sam McCallbed58852018-07-11 10:35:11 +000066 dlog("Source {0} = {1}, MaxUp = {2}", Canonical, S.second.Cost,
67 S.second.MaxUpTraversals);
Sam McCall3f0243f2018-07-03 08:09:29 +000068 // Walk up to ancestors of this source, assigning cost.
69 StringRef Rest = Canonical;
70 llvm::hash_code Hash = hash_value(Rest);
71 for (unsigned I = 0; !Rest.empty(); ++I) {
72 Rest = parent_path(Rest, sys::path::Style::posix);
73 auto NextHash = hash_value(Rest);
Sam McCall7c96bb62018-07-04 08:27:28 +000074 auto &Down = DownEdges[NextHash];
75 if (std::find(Down.begin(), Down.end(), Hash) == Down.end())
76 DownEdges[NextHash].push_back(Hash);
Sam McCall3f0243f2018-07-03 08:09:29 +000077 // We can't just break after MaxUpTraversals, must still set DownEdges.
78 if (I > S.getValue().MaxUpTraversals) {
79 if (Cache.find(Hash) != Cache.end())
80 break;
81 } else {
82 unsigned Cost = S.getValue().Cost + I * Opts.UpCost;
83 auto R = Cache.try_emplace(Hash, Cost);
84 if (!R.second) {
85 if (Cost < R.first->second) {
86 R.first->second = Cost;
87 } else {
88 // If we're not the best way to get to this path, stop assigning.
89 break;
90 }
91 }
92 }
93 Hash = NextHash;
94 }
95 }
96 // Now propagate scores parent -> child if that's an improvement.
97 // BFS ensures we propagate down chains (must visit parents before children).
98 std::queue<hash_code> Next;
99 for (auto Child : DownEdges.lookup(hash_value(llvm::StringRef(""))))
100 Next.push(Child);
101 while (!Next.empty()) {
102 auto ParentCost = Cache.lookup(Next.front());
103 for (auto Child : DownEdges.lookup(Next.front())) {
104 auto &ChildCost =
105 Cache.try_emplace(Child, kUnreachable).first->getSecond();
106 if (ParentCost + Opts.DownCost < ChildCost)
107 ChildCost = ParentCost + Opts.DownCost;
108 Next.push(Child);
109 }
110 Next.pop();
111 }
112}
113
114unsigned FileDistance::distance(StringRef Path) {
115 auto Canonical = canonicalize(Path);
116 unsigned Cost = kUnreachable;
117 SmallVector<hash_code, 16> Ancestors;
118 // Walk up ancestors until we find a path we know the distance for.
119 for (StringRef Rest = Canonical; !Rest.empty();
120 Rest = parent_path(Rest, sys::path::Style::posix)) {
121 auto Hash = hash_value(Rest);
122 auto It = Cache.find(Hash);
123 if (It != Cache.end()) {
124 Cost = It->second;
125 break;
126 }
127 Ancestors.push_back(Hash);
128 }
129 // Now we know the costs for (known node, queried node].
130 // Fill these in, walking down the directory tree.
131 for (hash_code Hash : reverse(Ancestors)) {
132 if (Cost != kUnreachable)
133 Cost += Opts.DownCost;
134 Cache.try_emplace(Hash, Cost);
135 }
Sam McCallbed58852018-07-11 10:35:11 +0000136 dlog("distance({0} = {1})", Path, Cost);
Sam McCall3f0243f2018-07-03 08:09:29 +0000137 return Cost;
138}
139
140unsigned URIDistance::distance(llvm::StringRef URI) {
141 auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::kUnreachable);
142 if (!R.second)
143 return R.first->getSecond();
144 if (auto U = clangd::URI::parse(URI)) {
Sam McCallbed58852018-07-11 10:35:11 +0000145 dlog("distance({0} = {1})", URI, U->body());
Sam McCall3f0243f2018-07-03 08:09:29 +0000146 R.first->second = forScheme(U->scheme()).distance(U->body());
147 } else {
Sam McCallbed58852018-07-11 10:35:11 +0000148 log("URIDistance::distance() of unparseable {0}: {1}", URI, U.takeError());
Sam McCall3f0243f2018-07-03 08:09:29 +0000149 }
150 return R.first->second;
151}
152
153FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) {
154 auto &Delegate = ByScheme[Scheme];
155 if (!Delegate) {
156 llvm::StringMap<SourceParams> SchemeSources;
157 for (const auto &Source : Sources) {
158 if (auto U = clangd::URI::create(Source.getKey(), Scheme))
159 SchemeSources.try_emplace(U->body(), Source.getValue());
160 else
161 consumeError(U.takeError());
162 }
Sam McCallbed58852018-07-11 10:35:11 +0000163 dlog("FileDistance for scheme {0}: {1}/{2} sources", Scheme,
164 SchemeSources.size(), Sources.size());
Sam McCall3f0243f2018-07-03 08:09:29 +0000165 Delegate.reset(new FileDistance(std::move(SchemeSources), Opts));
166 }
167 return *Delegate;
168}
169
170} // namespace clangd
171} // namespace clang