blob: d60afa28ce3e3eaf3396fea28cef13146bbb95f9 [file] [log] [blame]
Sam McCall3f0243f2018-07-03 08:09:29 +00001//===--- FileDistance.h - File proximity scoring -----------------*- C++-*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This library measures the distance between file paths.
11// It's used for ranking symbols, e.g. in code completion.
12// |foo/bar.h -> foo/bar.h| = 0.
13// |foo/bar.h -> foo/baz.h| < |foo/bar.h -> baz.h|.
14// This is an edit-distance, where edits go up or down the directory tree.
15// It's not symmetrical, the costs of going up and down may not match.
16//
17// Dealing with multiple sources:
18// In practice we care about the distance from a source file, but files near
19// its main-header and #included files are considered "close".
20// So we start with a set of (anchor, cost) pairs, and call the distance to a
21// path the minimum of `cost + |source -> path|`.
22//
23// We allow each source to limit the number of up-traversals paths may start
24// with. Up-traversals may reach things that are not "semantically near".
25//
26// Symbol URI schemes:
27// Symbol locations may be represented by URIs rather than file paths directly.
28// In this case we want to perform distance computations in URI space rather
29// than in file-space, without performing redundant conversions.
30// Therefore we have a lookup structure that accepts URIs, so that intermediate
31// calculations for the same scheme can be reused.
32//
33// Caveats:
34// Assuming up and down traversals each have uniform costs is simplistic.
35// Often there are "semantic roots" whose children are almost unrelated.
36// (e.g. /usr/include/, or / in an umbrella repository). We ignore this.
37//
38//===----------------------------------------------------------------------===//
39
Kirill Bobyrev19a94612018-09-06 12:54:43 +000040#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H
41#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H
42
Sam McCall3f0243f2018-07-03 08:09:29 +000043#include "URI.h"
44#include "llvm/ADT/DenseMap.h"
45#include "llvm/ADT/DenseMapInfo.h"
46#include "llvm/ADT/SmallString.h"
47#include "llvm/ADT/StringRef.h"
48#include "llvm/Support/Allocator.h"
49#include "llvm/Support/Path.h"
50#include "llvm/Support/StringSaver.h"
Eric Liu3fac4ef2018-10-17 11:19:02 +000051#include <memory>
Sam McCall3f0243f2018-07-03 08:09:29 +000052
53namespace clang {
54namespace clangd {
55
56struct FileDistanceOptions {
57 unsigned UpCost = 2; // |foo/bar.h -> foo|
58 unsigned DownCost = 1; // |foo -> foo/bar.h|
59 unsigned IncludeCost = 2; // |foo.cc -> included_header.h|
Eric Liu0c29722c2018-10-16 10:41:17 +000060 bool AllowDownTraversalFromRoot = true; // | / -> /a |
Sam McCall3f0243f2018-07-03 08:09:29 +000061};
62
63struct SourceParams {
64 // Base cost for paths starting at this source.
65 unsigned Cost = 0;
66 // Limits the number of upwards traversals allowed from this source.
67 unsigned MaxUpTraversals = std::numeric_limits<unsigned>::max();
68};
69
70// Supports lookups to find the minimum distance to a file from any source.
71// This object should be reused, it memoizes intermediate computations.
72class FileDistance {
73public:
Kirill Bobyrevc1bb7b92018-08-31 08:19:50 +000074 static constexpr unsigned Unreachable = std::numeric_limits<unsigned>::max();
Eric Liu0c29722c2018-10-16 10:41:17 +000075 static const llvm::hash_code RootHash;
Sam McCall3f0243f2018-07-03 08:09:29 +000076
77 FileDistance(llvm::StringMap<SourceParams> Sources,
78 const FileDistanceOptions &Opts = {});
79
80 // Computes the minimum distance from any source to the file path.
81 unsigned distance(llvm::StringRef Path);
82
83private:
84 // Costs computed so far. Always contains sources and their ancestors.
85 // We store hash codes only. Collisions are rare and consequences aren't dire.
86 llvm::DenseMap<llvm::hash_code, unsigned> Cache;
87 FileDistanceOptions Opts;
88};
89
90// Supports lookups like FileDistance, but the lookup keys are URIs.
91// We convert each of the sources to the scheme of the URI and do a FileDistance
92// comparison on the bodies.
93class URIDistance {
94public:
Kirill Bobyreve5536b92018-09-07 09:18:58 +000095 // \p Sources must contain absolute paths, not URIs.
Sam McCall3f0243f2018-07-03 08:09:29 +000096 URIDistance(llvm::StringMap<SourceParams> Sources,
97 const FileDistanceOptions &Opts = {})
98 : Sources(Sources), Opts(Opts) {}
99
100 // Computes the minimum distance from any source to the URI.
101 // Only sources that can be mapped into the URI's scheme are considered.
102 unsigned distance(llvm::StringRef URI);
103
104private:
105 // Returns the FileDistance for a URI scheme, creating it if needed.
106 FileDistance &forScheme(llvm::StringRef Scheme);
107
108 // We cache the results using the original strings so we can skip URI parsing.
109 llvm::DenseMap<llvm::hash_code, unsigned> Cache;
110 llvm::StringMap<SourceParams> Sources;
111 llvm::StringMap<std::unique_ptr<FileDistance>> ByScheme;
112 FileDistanceOptions Opts;
113};
114
Eric Liu3fac4ef2018-10-17 11:19:02 +0000115/// Support lookups like FileDistance, but the lookup keys are symbol scopes.
116/// For example, a scope "na::nb::" is converted to "/na/nb".
117class ScopeDistance {
118public:
119 /// QueryScopes[0] is the preferred scope.
120 ScopeDistance(llvm::ArrayRef<std::string> QueryScopes);
121
122 unsigned distance(llvm::StringRef SymbolScope);
123
124private:
125 FileDistance Distance;
126};
127
Sam McCall3f0243f2018-07-03 08:09:29 +0000128} // namespace clangd
129} // namespace clang
Kirill Bobyrev19a94612018-09-06 12:54:43 +0000130
131#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H