Eugene Zelenko | 6366efe | 2018-03-14 21:05:51 +0000 | [diff] [blame] | 1 | //===- FileMatchTrie.cpp --------------------------------------------------===// |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 2 | // |
Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains the implementation of a FileMatchTrie. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 13 | #include "clang/Tooling/FileMatchTrie.h" |
| 14 | #include "llvm/ADT/StringMap.h" |
Eugene Zelenko | 6366efe | 2018-03-14 21:05:51 +0000 | [diff] [blame] | 15 | #include "llvm/ADT/StringRef.h" |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 16 | #include "llvm/Support/FileSystem.h" |
Rafael Espindola | 552c169 | 2013-06-11 22:15:02 +0000 | [diff] [blame] | 17 | #include "llvm/Support/Path.h" |
Daniel Jasper | 8c05902 | 2012-10-08 18:37:21 +0000 | [diff] [blame] | 18 | #include "llvm/Support/raw_ostream.h" |
Eugene Zelenko | 6366efe | 2018-03-14 21:05:51 +0000 | [diff] [blame] | 19 | #include <string> |
| 20 | #include <vector> |
| 21 | |
Benjamin Kramer | 6a1457e | 2015-03-09 15:03:26 +0000 | [diff] [blame] | 22 | using namespace clang; |
| 23 | using namespace tooling; |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 24 | |
Benjamin Kramer | 6a1457e | 2015-03-09 15:03:26 +0000 | [diff] [blame] | 25 | namespace { |
Eugene Zelenko | 6366efe | 2018-03-14 21:05:51 +0000 | [diff] [blame] | 26 | |
Adrian Prantl | 9fc8faf | 2018-05-09 01:00:01 +0000 | [diff] [blame] | 27 | /// Default \c PathComparator using \c llvm::sys::fs::equivalent(). |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 28 | struct DefaultPathComparator : public PathComparator { |
Craig Topper | fb6b25b | 2014-03-15 04:29:04 +0000 | [diff] [blame] | 29 | bool equivalent(StringRef FileA, StringRef FileB) const override { |
Daniel Jasper | fddb32c | 2012-10-08 18:31:54 +0000 | [diff] [blame] | 30 | return FileA == FileB || llvm::sys::fs::equivalent(FileA, FileB); |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 31 | } |
| 32 | }; |
Eugene Zelenko | 6366efe | 2018-03-14 21:05:51 +0000 | [diff] [blame] | 33 | |
| 34 | } // namespace |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 35 | |
Benjamin Kramer | 6a1457e | 2015-03-09 15:03:26 +0000 | [diff] [blame] | 36 | namespace clang { |
| 37 | namespace tooling { |
Eugene Zelenko | 6366efe | 2018-03-14 21:05:51 +0000 | [diff] [blame] | 38 | |
Adrian Prantl | 9fc8faf | 2018-05-09 01:00:01 +0000 | [diff] [blame] | 39 | /// A node of the \c FileMatchTrie. |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 40 | /// |
| 41 | /// Each node has storage for up to one path and a map mapping a path segment to |
| 42 | /// child nodes. The trie starts with an empty root node. |
| 43 | class FileMatchTrieNode { |
| 44 | public: |
Adrian Prantl | 9fc8faf | 2018-05-09 01:00:01 +0000 | [diff] [blame] | 45 | /// Inserts 'NewPath' into this trie. \c ConsumedLength denotes |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 46 | /// the number of \c NewPath's trailing characters already consumed during |
| 47 | /// recursion. |
| 48 | /// |
| 49 | /// An insert of a path |
| 50 | /// 'p'starts at the root node and does the following: |
| 51 | /// - If the node is empty, insert 'p' into its storage and abort. |
| 52 | /// - If the node has a path 'p2' but no children, take the last path segment |
| 53 | /// 's' of 'p2', put a new child into the map at 's' an insert the rest of |
| 54 | /// 'p2' there. |
| 55 | /// - Insert a new child for the last segment of 'p' and insert the rest of |
| 56 | /// 'p' there. |
| 57 | /// |
| 58 | /// An insert operation is linear in the number of a path's segments. |
| 59 | void insert(StringRef NewPath, unsigned ConsumedLength = 0) { |
| 60 | // We cannot put relative paths into the FileMatchTrie as then a path can be |
| 61 | // a postfix of another path, violating a core assumption of the trie. |
| 62 | if (llvm::sys::path::is_relative(NewPath)) |
| 63 | return; |
| 64 | if (Path.empty()) { |
| 65 | // This is an empty leaf. Store NewPath and return. |
Benjamin Kramer | adcd026 | 2020-01-28 20:23:46 +0100 | [diff] [blame] | 66 | Path = std::string(NewPath); |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 67 | return; |
| 68 | } |
| 69 | if (Children.empty()) { |
| 70 | // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'. |
| 71 | if (NewPath == Path) |
| 72 | return; |
| 73 | // Make this a node and create a child-leaf with 'Path'. |
| 74 | StringRef Element(llvm::sys::path::filename( |
| 75 | StringRef(Path).drop_back(ConsumedLength))); |
| 76 | Children[Element].Path = Path; |
| 77 | } |
| 78 | StringRef Element(llvm::sys::path::filename( |
| 79 | StringRef(NewPath).drop_back(ConsumedLength))); |
| 80 | Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1); |
| 81 | } |
| 82 | |
Adrian Prantl | 9fc8faf | 2018-05-09 01:00:01 +0000 | [diff] [blame] | 83 | /// Tries to find the node under this \c FileMatchTrieNode that best |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 84 | /// matches 'FileName'. |
| 85 | /// |
| 86 | /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to |
| 87 | /// \c true and an empty string is returned. If no path fits 'FileName', an |
| 88 | /// empty string is returned. \c ConsumedLength denotes the number of |
| 89 | /// \c Filename's trailing characters already consumed during recursion. |
| 90 | /// |
| 91 | /// To find the best matching node for a given path 'p', the |
| 92 | /// \c findEquivalent() function is called recursively for each path segment |
Alexander Kornienko | 2a8c18d | 2018-04-06 15:14:32 +0000 | [diff] [blame] | 93 | /// (back to front) of 'p' until a node 'n' is reached that does not .. |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 94 | /// - .. have children. In this case it is checked |
| 95 | /// whether the stored path is equivalent to 'p'. If yes, the best match is |
| 96 | /// found. Otherwise continue with the parent node as if this node did not |
| 97 | /// exist. |
| 98 | /// - .. a child matching the next path segment. In this case, all children of |
| 99 | /// 'n' are an equally good match for 'p'. All children are of 'n' are found |
| 100 | /// recursively and their equivalence to 'p' is determined. If none are |
| 101 | /// equivalent, continue with the parent node as if 'n' didn't exist. If one |
| 102 | /// is equivalent, the best match is found. Otherwise, report and ambigiuity |
| 103 | /// error. |
| 104 | StringRef findEquivalent(const PathComparator& Comparator, |
| 105 | StringRef FileName, |
| 106 | bool &IsAmbiguous, |
| 107 | unsigned ConsumedLength = 0) const { |
| 108 | if (Children.empty()) { |
| 109 | if (Comparator.equivalent(StringRef(Path), FileName)) |
| 110 | return StringRef(Path); |
Eugene Zelenko | 6366efe | 2018-03-14 21:05:51 +0000 | [diff] [blame] | 111 | return {}; |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 112 | } |
| 113 | StringRef Element(llvm::sys::path::filename(FileName.drop_back( |
| 114 | ConsumedLength))); |
| 115 | llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild = |
| 116 | Children.find(Element); |
| 117 | if (MatchingChild != Children.end()) { |
| 118 | StringRef Result = MatchingChild->getValue().findEquivalent( |
| 119 | Comparator, FileName, IsAmbiguous, |
| 120 | ConsumedLength + Element.size() + 1); |
| 121 | if (!Result.empty() || IsAmbiguous) |
| 122 | return Result; |
| 123 | } |
| 124 | std::vector<StringRef> AllChildren; |
| 125 | getAll(AllChildren, MatchingChild); |
| 126 | StringRef Result; |
Eugene Zelenko | 6366efe | 2018-03-14 21:05:51 +0000 | [diff] [blame] | 127 | for (const auto &Child : AllChildren) { |
| 128 | if (Comparator.equivalent(Child, FileName)) { |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 129 | if (Result.empty()) { |
Eugene Zelenko | 6366efe | 2018-03-14 21:05:51 +0000 | [diff] [blame] | 130 | Result = Child; |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 131 | } else { |
| 132 | IsAmbiguous = true; |
Eugene Zelenko | 6366efe | 2018-03-14 21:05:51 +0000 | [diff] [blame] | 133 | return {}; |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 134 | } |
| 135 | } |
| 136 | } |
| 137 | return Result; |
| 138 | } |
| 139 | |
| 140 | private: |
Adrian Prantl | 9fc8faf | 2018-05-09 01:00:01 +0000 | [diff] [blame] | 141 | /// Gets all paths under this FileMatchTrieNode. |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 142 | void getAll(std::vector<StringRef> &Results, |
| 143 | llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const { |
| 144 | if (Path.empty()) |
| 145 | return; |
| 146 | if (Children.empty()) { |
| 147 | Results.push_back(StringRef(Path)); |
| 148 | return; |
| 149 | } |
| 150 | for (llvm::StringMap<FileMatchTrieNode>::const_iterator |
| 151 | It = Children.begin(), E = Children.end(); |
| 152 | It != E; ++It) { |
| 153 | if (It == Except) |
| 154 | continue; |
| 155 | It->getValue().getAll(Results, Children.end()); |
| 156 | } |
| 157 | } |
| 158 | |
| 159 | // The stored absolute path in this node. Only valid for leaf nodes, i.e. |
| 160 | // nodes where Children.empty(). |
| 161 | std::string Path; |
| 162 | |
| 163 | // The children of this node stored in a map based on the next path segment. |
| 164 | llvm::StringMap<FileMatchTrieNode> Children; |
| 165 | }; |
Eugene Zelenko | 6366efe | 2018-03-14 21:05:51 +0000 | [diff] [blame] | 166 | |
| 167 | } // namespace tooling |
| 168 | } // namespace clang |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 169 | |
| 170 | FileMatchTrie::FileMatchTrie() |
Eugene Zelenko | 6366efe | 2018-03-14 21:05:51 +0000 | [diff] [blame] | 171 | : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {} |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 172 | |
| 173 | FileMatchTrie::FileMatchTrie(PathComparator *Comparator) |
Eugene Zelenko | 6366efe | 2018-03-14 21:05:51 +0000 | [diff] [blame] | 174 | : Root(new FileMatchTrieNode), Comparator(Comparator) {} |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 175 | |
| 176 | FileMatchTrie::~FileMatchTrie() { |
| 177 | delete Root; |
| 178 | } |
| 179 | |
| 180 | void FileMatchTrie::insert(StringRef NewPath) { |
| 181 | Root->insert(NewPath); |
| 182 | } |
| 183 | |
| 184 | StringRef FileMatchTrie::findEquivalent(StringRef FileName, |
Dmitri Gribenko | f857950 | 2013-01-12 19:30:44 +0000 | [diff] [blame] | 185 | raw_ostream &Error) const { |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 186 | if (llvm::sys::path::is_relative(FileName)) { |
| 187 | Error << "Cannot resolve relative paths"; |
Eugene Zelenko | 6366efe | 2018-03-14 21:05:51 +0000 | [diff] [blame] | 188 | return {}; |
Daniel Jasper | 26cf9c4 | 2012-10-08 16:08:15 +0000 | [diff] [blame] | 189 | } |
| 190 | bool IsAmbiguous = false; |
| 191 | StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous); |
| 192 | if (IsAmbiguous) |
| 193 | Error << "Path is ambiguous"; |
| 194 | return Result; |
| 195 | } |