1 | //===- FileMatchTrie.cpp --------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the implementation of a FileMatchTrie. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "clang/Tooling/FileMatchTrie.h" |
14 | #include "llvm/ADT/StringMap.h" |
15 | #include "llvm/ADT/StringRef.h" |
16 | #include "llvm/Support/FileSystem.h" |
17 | #include "llvm/Support/Path.h" |
18 | #include <string> |
19 | #include <vector> |
20 | |
21 | using namespace clang; |
22 | using namespace tooling; |
23 | |
24 | namespace { |
25 | |
26 | /// Default \c PathComparator using \c llvm::sys::fs::equivalent(). |
27 | struct DefaultPathComparator : public PathComparator { |
28 | bool equivalent(StringRef FileA, StringRef FileB) const override { |
29 | return FileA == FileB || llvm::sys::fs::equivalent(A: FileA, B: FileB); |
30 | } |
31 | }; |
32 | |
33 | } // namespace |
34 | |
35 | namespace clang { |
36 | namespace tooling { |
37 | |
38 | /// A node of the \c FileMatchTrie. |
39 | /// |
40 | /// Each node has storage for up to one path and a map mapping a path segment to |
41 | /// child nodes. The trie starts with an empty root node. |
42 | class FileMatchTrieNode { |
43 | public: |
44 | /// Inserts 'NewPath' into this trie. \c ConsumedLength denotes |
45 | /// the number of \c NewPath's trailing characters already consumed during |
46 | /// recursion. |
47 | /// |
48 | /// An insert of a path |
49 | /// 'p'starts at the root node and does the following: |
50 | /// - If the node is empty, insert 'p' into its storage and abort. |
51 | /// - If the node has a path 'p2' but no children, take the last path segment |
52 | /// 's' of 'p2', put a new child into the map at 's' an insert the rest of |
53 | /// 'p2' there. |
54 | /// - Insert a new child for the last segment of 'p' and insert the rest of |
55 | /// 'p' there. |
56 | /// |
57 | /// An insert operation is linear in the number of a path's segments. |
58 | void insert(StringRef NewPath, unsigned ConsumedLength = 0) { |
59 | // We cannot put relative paths into the FileMatchTrie as then a path can be |
60 | // a postfix of another path, violating a core assumption of the trie. |
61 | if (llvm::sys::path::is_relative(path: NewPath)) |
62 | return; |
63 | if (Path.empty()) { |
64 | // This is an empty leaf. Store NewPath and return. |
65 | Path = std::string(NewPath); |
66 | return; |
67 | } |
68 | if (Children.empty()) { |
69 | // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'. |
70 | if (NewPath == Path) |
71 | return; |
72 | // Make this a node and create a child-leaf with 'Path'. |
73 | StringRef Element(llvm::sys::path::filename( |
74 | path: StringRef(Path).drop_back(N: ConsumedLength))); |
75 | Children[Element].Path = Path; |
76 | } |
77 | StringRef Element(llvm::sys::path::filename( |
78 | path: StringRef(NewPath).drop_back(N: ConsumedLength))); |
79 | Children[Element].insert(NewPath, ConsumedLength: ConsumedLength + Element.size() + 1); |
80 | } |
81 | |
82 | /// Tries to find the node under this \c FileMatchTrieNode that best |
83 | /// matches 'FileName'. |
84 | /// |
85 | /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to |
86 | /// \c true and an empty string is returned. If no path fits 'FileName', an |
87 | /// empty string is returned. \c ConsumedLength denotes the number of |
88 | /// \c Filename's trailing characters already consumed during recursion. |
89 | /// |
90 | /// To find the best matching node for a given path 'p', the |
91 | /// \c findEquivalent() function is called recursively for each path segment |
92 | /// (back to front) of 'p' until a node 'n' is reached that does not .. |
93 | /// - .. have children. In this case it is checked |
94 | /// whether the stored path is equivalent to 'p'. If yes, the best match is |
95 | /// found. Otherwise continue with the parent node as if this node did not |
96 | /// exist. |
97 | /// - .. a child matching the next path segment. In this case, all children of |
98 | /// 'n' are an equally good match for 'p'. All children are of 'n' are found |
99 | /// recursively and their equivalence to 'p' is determined. If none are |
100 | /// equivalent, continue with the parent node as if 'n' didn't exist. If one |
101 | /// is equivalent, the best match is found. Otherwise, report and ambigiuity |
102 | /// error. |
103 | StringRef findEquivalent(const PathComparator& Comparator, |
104 | StringRef FileName, |
105 | bool &IsAmbiguous, |
106 | unsigned ConsumedLength = 0) const { |
107 | // Note: we support only directory symlinks for performance reasons. |
108 | if (Children.empty()) { |
109 | // As far as we do not support file symlinks, compare |
110 | // basenames here to avoid request to file system. |
111 | if (llvm::sys::path::filename(path: Path) == |
112 | llvm::sys::path::filename(path: FileName) && |
113 | Comparator.equivalent(FileA: StringRef(Path), FileB: FileName)) |
114 | return StringRef(Path); |
115 | return {}; |
116 | } |
117 | StringRef Element(llvm::sys::path::filename(path: FileName.drop_back( |
118 | N: ConsumedLength))); |
119 | llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild = |
120 | Children.find(Key: Element); |
121 | if (MatchingChild != Children.end()) { |
122 | StringRef Result = MatchingChild->getValue().findEquivalent( |
123 | Comparator, FileName, IsAmbiguous, |
124 | ConsumedLength: ConsumedLength + Element.size() + 1); |
125 | if (!Result.empty() || IsAmbiguous) |
126 | return Result; |
127 | } |
128 | |
129 | // If `ConsumedLength` is zero, this is the root and we have no filename |
130 | // match. Give up in this case, we don't try to find symlinks with |
131 | // different names. |
132 | if (ConsumedLength == 0) |
133 | return {}; |
134 | |
135 | std::vector<StringRef> AllChildren; |
136 | getAll(Results&: AllChildren, Except: MatchingChild); |
137 | StringRef Result; |
138 | for (const auto &Child : AllChildren) { |
139 | if (Comparator.equivalent(FileA: Child, FileB: FileName)) { |
140 | if (Result.empty()) { |
141 | Result = Child; |
142 | } else { |
143 | IsAmbiguous = true; |
144 | return {}; |
145 | } |
146 | } |
147 | } |
148 | return Result; |
149 | } |
150 | |
151 | private: |
152 | /// Gets all paths under this FileMatchTrieNode. |
153 | void getAll(std::vector<StringRef> &Results, |
154 | llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const { |
155 | if (Path.empty()) |
156 | return; |
157 | if (Children.empty()) { |
158 | Results.push_back(x: StringRef(Path)); |
159 | return; |
160 | } |
161 | for (llvm::StringMap<FileMatchTrieNode>::const_iterator |
162 | It = Children.begin(), E = Children.end(); |
163 | It != E; ++It) { |
164 | if (It == Except) |
165 | continue; |
166 | It->getValue().getAll(Results, Except: Children.end()); |
167 | } |
168 | } |
169 | |
170 | // The stored absolute path in this node. Only valid for leaf nodes, i.e. |
171 | // nodes where Children.empty(). |
172 | std::string Path; |
173 | |
174 | // The children of this node stored in a map based on the next path segment. |
175 | llvm::StringMap<FileMatchTrieNode> Children; |
176 | }; |
177 | |
178 | } // namespace tooling |
179 | } // namespace clang |
180 | |
181 | FileMatchTrie::FileMatchTrie() |
182 | : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {} |
183 | |
184 | FileMatchTrie::FileMatchTrie(PathComparator *Comparator) |
185 | : Root(new FileMatchTrieNode), Comparator(Comparator) {} |
186 | |
187 | FileMatchTrie::~FileMatchTrie() { |
188 | delete Root; |
189 | } |
190 | |
191 | void FileMatchTrie::insert(StringRef NewPath) { |
192 | Root->insert(NewPath); |
193 | } |
194 | |
195 | StringRef FileMatchTrie::findEquivalent(StringRef FileName, |
196 | raw_ostream &Error) const { |
197 | if (llvm::sys::path::is_relative(path: FileName)) { |
198 | Error << "Cannot resolve relative paths" ; |
199 | return {}; |
200 | } |
201 | bool IsAmbiguous = false; |
202 | StringRef Result = Root->findEquivalent(Comparator: *Comparator, FileName, IsAmbiguous); |
203 | if (IsAmbiguous) |
204 | Error << "Path is ambiguous" ; |
205 | return Result; |
206 | } |
207 | |