1 | //===-- FileSpec.cpp ------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "lldb/Utility/FileSpec.h" |
10 | #include "lldb/Utility/RegularExpression.h" |
11 | #include "lldb/Utility/Stream.h" |
12 | |
13 | #include "llvm/ADT/SmallString.h" |
14 | #include "llvm/ADT/SmallVector.h" |
15 | #include "llvm/ADT/StringExtras.h" |
16 | #include "llvm/ADT/StringRef.h" |
17 | #include "llvm/ADT/Twine.h" |
18 | #include "llvm/Support/ErrorOr.h" |
19 | #include "llvm/Support/FileSystem.h" |
20 | #include "llvm/Support/Program.h" |
21 | #include "llvm/Support/raw_ostream.h" |
22 | #include "llvm/TargetParser/Triple.h" |
23 | |
24 | #include <algorithm> |
25 | #include <optional> |
26 | #include <system_error> |
27 | #include <vector> |
28 | |
29 | #include <cassert> |
30 | #include <climits> |
31 | #include <cstdio> |
32 | #include <cstring> |
33 | |
34 | using namespace lldb; |
35 | using namespace lldb_private; |
36 | |
37 | namespace { |
38 | |
39 | static constexpr FileSpec::Style GetNativeStyle() { |
40 | #if defined(_WIN32) |
41 | return FileSpec::Style::windows; |
42 | #else |
43 | return FileSpec::Style::posix; |
44 | #endif |
45 | } |
46 | |
47 | bool PathStyleIsPosix(FileSpec::Style style) { |
48 | return llvm::sys::path::is_style_posix(S: style); |
49 | } |
50 | |
51 | const char *GetPathSeparators(FileSpec::Style style) { |
52 | return llvm::sys::path::get_separator(style).data(); |
53 | } |
54 | |
55 | char GetPreferredPathSeparator(FileSpec::Style style) { |
56 | return GetPathSeparators(style)[0]; |
57 | } |
58 | |
59 | void Denormalize(llvm::SmallVectorImpl<char> &path, FileSpec::Style style) { |
60 | if (PathStyleIsPosix(style)) |
61 | return; |
62 | |
63 | std::replace(first: path.begin(), last: path.end(), old_value: '/', new_value: '\\'); |
64 | } |
65 | |
66 | } // end anonymous namespace |
67 | |
68 | FileSpec::FileSpec() : m_style(GetNativeStyle()) {} |
69 | |
70 | // Default constructor that can take an optional full path to a file on disk. |
71 | FileSpec::FileSpec(llvm::StringRef path, Style style) : m_style(style) { |
72 | SetFile(path, style); |
73 | } |
74 | |
75 | FileSpec::FileSpec(llvm::StringRef path, const llvm::Triple &triple) |
76 | : FileSpec{path, triple.isOSWindows() ? Style::windows : Style::posix} {} |
77 | |
78 | namespace { |
79 | /// Safely get a character at the specified index. |
80 | /// |
81 | /// \param[in] path |
82 | /// A full, partial, or relative path to a file. |
83 | /// |
84 | /// \param[in] i |
85 | /// An index into path which may or may not be valid. |
86 | /// |
87 | /// \return |
88 | /// The character at index \a i if the index is valid, or 0 if |
89 | /// the index is not valid. |
90 | inline char safeCharAtIndex(const llvm::StringRef &path, size_t i) { |
91 | if (i < path.size()) |
92 | return path[i]; |
93 | return 0; |
94 | } |
95 | |
96 | /// Check if a path needs to be normalized. |
97 | /// |
98 | /// Check if a path needs to be normalized. We currently consider a |
99 | /// path to need normalization if any of the following are true |
100 | /// - path contains "/./" |
101 | /// - path contains "/../" |
102 | /// - path contains "//" |
103 | /// - path ends with "/" |
104 | /// Paths that start with "./" or with "../" are not considered to |
105 | /// need normalization since we aren't trying to resolve the path, |
106 | /// we are just trying to remove redundant things from the path. |
107 | /// |
108 | /// \param[in] path |
109 | /// A full, partial, or relative path to a file. |
110 | /// |
111 | /// \return |
112 | /// Returns \b true if the path needs to be normalized. |
113 | bool needsNormalization(const llvm::StringRef &path) { |
114 | if (path.empty()) |
115 | return false; |
116 | // We strip off leading "." values so these paths need to be normalized |
117 | if (path[0] == '.') |
118 | return true; |
119 | for (auto i = path.find_first_of(Chars: "\\/" ); i != llvm::StringRef::npos; |
120 | i = path.find_first_of(Chars: "\\/" , From: i + 1)) { |
121 | const auto next = safeCharAtIndex(path, i: i+1); |
122 | switch (next) { |
123 | case 0: |
124 | // path separator char at the end of the string which should be |
125 | // stripped unless it is the one and only character |
126 | return i > 0; |
127 | case '/': |
128 | case '\\': |
129 | // two path separator chars in the middle of a path needs to be |
130 | // normalized |
131 | if (i > 0) |
132 | return true; |
133 | ++i; |
134 | break; |
135 | |
136 | case '.': { |
137 | const auto next_next = safeCharAtIndex(path, i: i+2); |
138 | switch (next_next) { |
139 | default: break; |
140 | case 0: return true; // ends with "/." |
141 | case '/': |
142 | case '\\': |
143 | return true; // contains "/./" |
144 | case '.': { |
145 | const auto next_next_next = safeCharAtIndex(path, i: i+3); |
146 | switch (next_next_next) { |
147 | default: break; |
148 | case 0: return true; // ends with "/.." |
149 | case '/': |
150 | case '\\': |
151 | return true; // contains "/../" |
152 | } |
153 | break; |
154 | } |
155 | } |
156 | } |
157 | break; |
158 | |
159 | default: |
160 | break; |
161 | } |
162 | } |
163 | return false; |
164 | } |
165 | |
166 | |
167 | } |
168 | |
169 | void FileSpec::SetFile(llvm::StringRef pathname) { SetFile(path: pathname, style: m_style); } |
170 | |
171 | // Update the contents of this object with a new path. The path will be split |
172 | // up into a directory and filename and stored as uniqued string values for |
173 | // quick comparison and efficient memory usage. |
174 | void FileSpec::SetFile(llvm::StringRef pathname, Style style) { |
175 | Clear(); |
176 | m_style = (style == Style::native) ? GetNativeStyle() : style; |
177 | |
178 | if (pathname.empty()) |
179 | return; |
180 | |
181 | llvm::SmallString<128> resolved(pathname); |
182 | |
183 | // Normalize the path by removing ".", ".." and other redundant components. |
184 | if (needsNormalization(path: resolved)) |
185 | llvm::sys::path::remove_dots(path&: resolved, remove_dot_dot: true, style: m_style); |
186 | |
187 | // Normalize back slashes to forward slashes |
188 | if (m_style == Style::windows) |
189 | std::replace(first: resolved.begin(), last: resolved.end(), old_value: '\\', new_value: '/'); |
190 | |
191 | if (resolved.empty()) { |
192 | // If we have no path after normalization set the path to the current |
193 | // directory. This matches what python does and also a few other path |
194 | // utilities. |
195 | m_filename.SetString("." ); |
196 | return; |
197 | } |
198 | |
199 | // Split path into filename and directory. We rely on the underlying char |
200 | // pointer to be nullptr when the components are empty. |
201 | llvm::StringRef filename = llvm::sys::path::filename(path: resolved, style: m_style); |
202 | if(!filename.empty()) |
203 | m_filename.SetString(filename); |
204 | |
205 | llvm::StringRef directory = llvm::sys::path::parent_path(path: resolved, style: m_style); |
206 | if(!directory.empty()) |
207 | m_directory.SetString(directory); |
208 | } |
209 | |
210 | void FileSpec::SetFile(llvm::StringRef path, const llvm::Triple &triple) { |
211 | return SetFile(pathname: path, style: triple.isOSWindows() ? Style::windows : Style::posix); |
212 | } |
213 | |
214 | // Convert to pointer operator. This allows code to check any FileSpec objects |
215 | // to see if they contain anything valid using code such as: |
216 | // |
217 | // if (file_spec) |
218 | // {} |
219 | FileSpec::operator bool() const { return m_filename || m_directory; } |
220 | |
221 | // Logical NOT operator. This allows code to check any FileSpec objects to see |
222 | // if they are invalid using code such as: |
223 | // |
224 | // if (!file_spec) |
225 | // {} |
226 | bool FileSpec::operator!() const { return !m_directory && !m_filename; } |
227 | |
228 | bool FileSpec::DirectoryEquals(const FileSpec &rhs) const { |
229 | const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive(); |
230 | return ConstString::Equals(lhs: m_directory, rhs: rhs.m_directory, case_sensitive); |
231 | } |
232 | |
233 | bool FileSpec::FileEquals(const FileSpec &rhs) const { |
234 | const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive(); |
235 | return ConstString::Equals(lhs: m_filename, rhs: rhs.m_filename, case_sensitive); |
236 | } |
237 | |
238 | // Equal to operator |
239 | bool FileSpec::operator==(const FileSpec &rhs) const { |
240 | return FileEquals(rhs) && DirectoryEquals(rhs); |
241 | } |
242 | |
243 | // Not equal to operator |
244 | bool FileSpec::operator!=(const FileSpec &rhs) const { return !(*this == rhs); } |
245 | |
246 | // Less than operator |
247 | bool FileSpec::operator<(const FileSpec &rhs) const { |
248 | return FileSpec::Compare(lhs: *this, rhs, full: true) < 0; |
249 | } |
250 | |
251 | // Dump a FileSpec object to a stream |
252 | Stream &lldb_private::operator<<(Stream &s, const FileSpec &f) { |
253 | f.Dump(s&: s.AsRawOstream()); |
254 | return s; |
255 | } |
256 | |
257 | // Clear this object by releasing both the directory and filename string values |
258 | // and making them both the empty string. |
259 | void FileSpec::Clear() { |
260 | m_directory.Clear(); |
261 | m_filename.Clear(); |
262 | PathWasModified(); |
263 | } |
264 | |
265 | // Compare two FileSpec objects. If "full" is true, then both the directory and |
266 | // the filename must match. If "full" is false, then the directory names for |
267 | // "a" and "b" are only compared if they are both non-empty. This allows a |
268 | // FileSpec object to only contain a filename and it can match FileSpec objects |
269 | // that have matching filenames with different paths. |
270 | // |
271 | // Return -1 if the "a" is less than "b", 0 if "a" is equal to "b" and "1" if |
272 | // "a" is greater than "b". |
273 | int FileSpec::Compare(const FileSpec &a, const FileSpec &b, bool full) { |
274 | int result = 0; |
275 | |
276 | // case sensitivity of compare |
277 | const bool case_sensitive = a.IsCaseSensitive() || b.IsCaseSensitive(); |
278 | |
279 | // If full is true, then we must compare both the directory and filename. |
280 | |
281 | // If full is false, then if either directory is empty, then we match on the |
282 | // basename only, and if both directories have valid values, we still do a |
283 | // full compare. This allows for matching when we just have a filename in one |
284 | // of the FileSpec objects. |
285 | |
286 | if (full || (a.m_directory && b.m_directory)) { |
287 | result = ConstString::Compare(lhs: a.m_directory, rhs: b.m_directory, case_sensitive); |
288 | if (result) |
289 | return result; |
290 | } |
291 | return ConstString::Compare(lhs: a.m_filename, rhs: b.m_filename, case_sensitive); |
292 | } |
293 | |
294 | bool FileSpec::Equal(const FileSpec &a, const FileSpec &b, bool full) { |
295 | if (full || (a.GetDirectory() && b.GetDirectory())) |
296 | return a == b; |
297 | |
298 | return a.FileEquals(rhs: b); |
299 | } |
300 | |
301 | bool FileSpec::Match(const FileSpec &pattern, const FileSpec &file) { |
302 | if (pattern.GetDirectory()) |
303 | return pattern == file; |
304 | if (pattern.GetFilename()) |
305 | return pattern.FileEquals(rhs: file); |
306 | return true; |
307 | } |
308 | |
309 | std::optional<FileSpec::Style> |
310 | FileSpec::GuessPathStyle(llvm::StringRef absolute_path) { |
311 | if (absolute_path.starts_with(Prefix: "/" )) |
312 | return Style::posix; |
313 | if (absolute_path.starts_with(Prefix: R"(\\)" )) |
314 | return Style::windows; |
315 | if (absolute_path.size() >= 3 && llvm::isAlpha(C: absolute_path[0]) && |
316 | (absolute_path.substr(Start: 1, N: 2) == R"(:\)" || |
317 | absolute_path.substr(Start: 1, N: 2) == R"(:/)" )) |
318 | return Style::windows; |
319 | return std::nullopt; |
320 | } |
321 | |
322 | // Dump the object to the supplied stream. If the object contains a valid |
323 | // directory name, it will be displayed followed by a directory delimiter, and |
324 | // the filename. |
325 | void FileSpec::Dump(llvm::raw_ostream &s) const { |
326 | std::string path{GetPath(denormalize: true)}; |
327 | s << path; |
328 | char path_separator = GetPreferredPathSeparator(style: m_style); |
329 | if (!m_filename && !path.empty() && path.back() != path_separator) |
330 | s << path_separator; |
331 | } |
332 | |
333 | FileSpec::Style FileSpec::GetPathStyle() const { return m_style; } |
334 | |
335 | void FileSpec::SetDirectory(ConstString directory) { |
336 | m_directory = directory; |
337 | PathWasModified(); |
338 | } |
339 | |
340 | void FileSpec::SetDirectory(llvm::StringRef directory) { |
341 | m_directory = ConstString(directory); |
342 | PathWasModified(); |
343 | } |
344 | |
345 | void FileSpec::SetFilename(ConstString filename) { |
346 | m_filename = filename; |
347 | PathWasModified(); |
348 | } |
349 | |
350 | void FileSpec::SetFilename(llvm::StringRef filename) { |
351 | m_filename = ConstString(filename); |
352 | PathWasModified(); |
353 | } |
354 | |
355 | void FileSpec::ClearFilename() { |
356 | m_filename.Clear(); |
357 | PathWasModified(); |
358 | } |
359 | |
360 | void FileSpec::ClearDirectory() { |
361 | m_directory.Clear(); |
362 | PathWasModified(); |
363 | } |
364 | |
365 | // Extract the directory and path into a fixed buffer. This is needed as the |
366 | // directory and path are stored in separate string values. |
367 | size_t FileSpec::GetPath(char *path, size_t path_max_len, |
368 | bool denormalize) const { |
369 | if (!path) |
370 | return 0; |
371 | |
372 | std::string result = GetPath(denormalize); |
373 | ::snprintf(s: path, maxlen: path_max_len, format: "%s" , result.c_str()); |
374 | return std::min(a: path_max_len - 1, b: result.length()); |
375 | } |
376 | |
377 | std::string FileSpec::GetPath(bool denormalize) const { |
378 | llvm::SmallString<64> result; |
379 | GetPath(path&: result, denormalize); |
380 | return static_cast<std::string>(result); |
381 | } |
382 | |
383 | ConstString FileSpec::GetPathAsConstString(bool denormalize) const { |
384 | return ConstString{GetPath(denormalize)}; |
385 | } |
386 | |
387 | void FileSpec::GetPath(llvm::SmallVectorImpl<char> &path, |
388 | bool denormalize) const { |
389 | path.append(in_start: m_directory.GetStringRef().begin(), |
390 | in_end: m_directory.GetStringRef().end()); |
391 | // Since the path was normalized and all paths use '/' when stored in these |
392 | // objects, we don't need to look for the actual syntax specific path |
393 | // separator, we just look for and insert '/'. |
394 | if (m_directory && m_filename && m_directory.GetStringRef().back() != '/' && |
395 | m_filename.GetStringRef().back() != '/') |
396 | path.insert(I: path.end(), Elt: '/'); |
397 | path.append(in_start: m_filename.GetStringRef().begin(), |
398 | in_end: m_filename.GetStringRef().end()); |
399 | if (denormalize && !path.empty()) |
400 | Denormalize(path, style: m_style); |
401 | } |
402 | |
403 | llvm::StringRef FileSpec::GetFileNameExtension() const { |
404 | return llvm::sys::path::extension(path: m_filename.GetStringRef(), style: m_style); |
405 | } |
406 | |
407 | ConstString FileSpec::GetFileNameStrippingExtension() const { |
408 | return ConstString(llvm::sys::path::stem(path: m_filename.GetStringRef(), style: m_style)); |
409 | } |
410 | |
411 | // Return the size in bytes that this object takes in memory. This returns the |
412 | // size in bytes of this object, not any shared string values it may refer to. |
413 | size_t FileSpec::MemorySize() const { |
414 | return m_filename.MemorySize() + m_directory.MemorySize(); |
415 | } |
416 | |
417 | FileSpec |
418 | FileSpec::CopyByAppendingPathComponent(llvm::StringRef component) const { |
419 | FileSpec ret = *this; |
420 | ret.AppendPathComponent(component); |
421 | return ret; |
422 | } |
423 | |
424 | FileSpec FileSpec::CopyByRemovingLastPathComponent() const { |
425 | llvm::SmallString<64> current_path; |
426 | GetPath(path&: current_path, denormalize: false); |
427 | if (llvm::sys::path::has_parent_path(path: current_path, style: m_style)) |
428 | return FileSpec(llvm::sys::path::parent_path(path: current_path, style: m_style), |
429 | m_style); |
430 | return *this; |
431 | } |
432 | |
433 | void FileSpec::PrependPathComponent(llvm::StringRef component) { |
434 | llvm::SmallString<64> new_path(component); |
435 | llvm::SmallString<64> current_path; |
436 | GetPath(path&: current_path, denormalize: false); |
437 | llvm::sys::path::append(path&: new_path, |
438 | begin: llvm::sys::path::begin(path: current_path, style: m_style), |
439 | end: llvm::sys::path::end(path: current_path), style: m_style); |
440 | SetFile(pathname: new_path, style: m_style); |
441 | } |
442 | |
443 | void FileSpec::PrependPathComponent(const FileSpec &new_path) { |
444 | return PrependPathComponent(component: new_path.GetPath(denormalize: false)); |
445 | } |
446 | |
447 | void FileSpec::AppendPathComponent(llvm::StringRef component) { |
448 | llvm::SmallString<64> current_path; |
449 | GetPath(path&: current_path, denormalize: false); |
450 | llvm::sys::path::append(path&: current_path, style: m_style, a: component); |
451 | SetFile(pathname: current_path, style: m_style); |
452 | } |
453 | |
454 | void FileSpec::AppendPathComponent(const FileSpec &new_path) { |
455 | return AppendPathComponent(component: new_path.GetPath(denormalize: false)); |
456 | } |
457 | |
458 | bool FileSpec::RemoveLastPathComponent() { |
459 | llvm::SmallString<64> current_path; |
460 | GetPath(path&: current_path, denormalize: false); |
461 | if (llvm::sys::path::has_parent_path(path: current_path, style: m_style)) { |
462 | SetFile(llvm::sys::path::parent_path(path: current_path, style: m_style)); |
463 | return true; |
464 | } |
465 | return false; |
466 | } |
467 | |
468 | std::vector<llvm::StringRef> FileSpec::GetComponents() const { |
469 | std::vector<llvm::StringRef> components; |
470 | |
471 | auto dir_begin = llvm::sys::path::begin(path: m_directory.GetStringRef(), style: m_style); |
472 | auto dir_end = llvm::sys::path::end(path: m_directory.GetStringRef()); |
473 | |
474 | for (auto iter = dir_begin; iter != dir_end; ++iter) { |
475 | if (*iter == "/" || *iter == "." ) |
476 | continue; |
477 | |
478 | components.push_back(x: *iter); |
479 | } |
480 | |
481 | if (!m_filename.IsEmpty() && m_filename != "/" && m_filename != "." ) |
482 | components.push_back(x: m_filename.GetStringRef()); |
483 | |
484 | return components; |
485 | } |
486 | |
487 | /// Returns true if the filespec represents an implementation source |
488 | /// file (files with a ".c", ".cpp", ".m", ".mm" (many more) |
489 | /// extension). |
490 | /// |
491 | /// \return |
492 | /// \b true if the filespec represents an implementation source |
493 | /// file, \b false otherwise. |
494 | bool FileSpec::IsSourceImplementationFile() const { |
495 | llvm::StringRef extension = GetFileNameExtension(); |
496 | if (extension.empty()) |
497 | return false; |
498 | |
499 | static RegularExpression g_source_file_regex(llvm::StringRef( |
500 | "^.([cC]|[mM]|[mM][mM]|[cC][pP][pP]|[cC]\\+\\+|[cC][xX][xX]|[cC][cC]|[" |
501 | "cC][pP]|[sS]|[aA][sS][mM]|[fF]|[fF]77|[fF]90|[fF]95|[fF]03|[fF][oO][" |
502 | "rR]|[fF][tT][nN]|[fF][pP][pP]|[aA][dD][aA]|[aA][dD][bB]|[aA][dD][sS])" |
503 | "$" )); |
504 | return g_source_file_regex.Execute(string: extension); |
505 | } |
506 | |
507 | bool FileSpec::IsRelative() const { |
508 | return !IsAbsolute(); |
509 | } |
510 | |
511 | bool FileSpec::IsAbsolute() const { |
512 | // Check if we have cached if this path is absolute to avoid recalculating. |
513 | if (m_absolute != Absolute::Calculate) |
514 | return m_absolute == Absolute::Yes; |
515 | |
516 | m_absolute = Absolute::No; |
517 | |
518 | llvm::SmallString<64> path; |
519 | GetPath(path, denormalize: false); |
520 | |
521 | if (!path.empty()) { |
522 | // We consider paths starting with ~ to be absolute. |
523 | if (path[0] == '~' || llvm::sys::path::is_absolute(path, style: m_style)) |
524 | m_absolute = Absolute::Yes; |
525 | } |
526 | |
527 | return m_absolute == Absolute::Yes; |
528 | } |
529 | |
530 | void FileSpec::MakeAbsolute(const FileSpec &dir) { |
531 | if (IsRelative()) |
532 | PrependPathComponent(new_path: dir); |
533 | } |
534 | |
535 | void llvm::format_provider<FileSpec>::format(const FileSpec &F, |
536 | raw_ostream &Stream, |
537 | StringRef Style) { |
538 | assert((Style.empty() || Style.equals_insensitive("F" ) || |
539 | Style.equals_insensitive("D" )) && |
540 | "Invalid FileSpec style!" ); |
541 | |
542 | StringRef dir = F.GetDirectory().GetStringRef(); |
543 | StringRef file = F.GetFilename().GetStringRef(); |
544 | |
545 | if (dir.empty() && file.empty()) { |
546 | Stream << "(empty)" ; |
547 | return; |
548 | } |
549 | |
550 | if (Style.equals_insensitive(RHS: "F" )) { |
551 | Stream << (file.empty() ? "(empty)" : file); |
552 | return; |
553 | } |
554 | |
555 | // Style is either D or empty, either way we need to print the directory. |
556 | if (!dir.empty()) { |
557 | // Directory is stored in normalized form, which might be different than |
558 | // preferred form. In order to handle this, we need to cut off the |
559 | // filename, then denormalize, then write the entire denorm'ed directory. |
560 | llvm::SmallString<64> denormalized_dir = dir; |
561 | Denormalize(path&: denormalized_dir, style: F.GetPathStyle()); |
562 | Stream << denormalized_dir; |
563 | Stream << GetPreferredPathSeparator(style: F.GetPathStyle()); |
564 | } |
565 | |
566 | if (Style.equals_insensitive(RHS: "D" )) { |
567 | // We only want to print the directory, so now just exit. |
568 | if (dir.empty()) |
569 | Stream << "(empty)" ; |
570 | return; |
571 | } |
572 | |
573 | if (!file.empty()) |
574 | Stream << file; |
575 | } |
576 | |