1use std::{
2 cmp::Ordering,
3 ffi::OsStr,
4 fs::{self, FileType, Metadata},
5 io,
6 path::{Path, PathBuf},
7 sync::atomic::{AtomicBool, AtomicUsize, Ordering as AtomicOrdering},
8 sync::Arc,
9};
10
11use {
12 crossbeam_deque::{Stealer, Worker as Deque},
13 same_file::Handle,
14 walkdir::WalkDir,
15};
16
17use crate::{
18 dir::{Ignore, IgnoreBuilder},
19 gitignore::GitignoreBuilder,
20 overrides::Override,
21 types::Types,
22 Error, PartialErrorBuilder,
23};
24
25/// A directory entry with a possible error attached.
26///
27/// The error typically refers to a problem parsing ignore files in a
28/// particular directory.
29#[derive(Clone, Debug)]
30pub struct DirEntry {
31 dent: DirEntryInner,
32 err: Option<Error>,
33}
34
35impl DirEntry {
36 /// The full path that this entry represents.
37 pub fn path(&self) -> &Path {
38 self.dent.path()
39 }
40
41 /// The full path that this entry represents.
42 /// Analogous to [`DirEntry::path`], but moves ownership of the path.
43 pub fn into_path(self) -> PathBuf {
44 self.dent.into_path()
45 }
46
47 /// Whether this entry corresponds to a symbolic link or not.
48 pub fn path_is_symlink(&self) -> bool {
49 self.dent.path_is_symlink()
50 }
51
52 /// Returns true if and only if this entry corresponds to stdin.
53 ///
54 /// i.e., The entry has depth 0 and its file name is `-`.
55 pub fn is_stdin(&self) -> bool {
56 self.dent.is_stdin()
57 }
58
59 /// Return the metadata for the file that this entry points to.
60 pub fn metadata(&self) -> Result<Metadata, Error> {
61 self.dent.metadata()
62 }
63
64 /// Return the file type for the file that this entry points to.
65 ///
66 /// This entry doesn't have a file type if it corresponds to stdin.
67 pub fn file_type(&self) -> Option<FileType> {
68 self.dent.file_type()
69 }
70
71 /// Return the file name of this entry.
72 ///
73 /// If this entry has no file name (e.g., `/`), then the full path is
74 /// returned.
75 pub fn file_name(&self) -> &OsStr {
76 self.dent.file_name()
77 }
78
79 /// Returns the depth at which this entry was created relative to the root.
80 pub fn depth(&self) -> usize {
81 self.dent.depth()
82 }
83
84 /// Returns the underlying inode number if one exists.
85 ///
86 /// If this entry doesn't have an inode number, then `None` is returned.
87 #[cfg(unix)]
88 pub fn ino(&self) -> Option<u64> {
89 self.dent.ino()
90 }
91
92 /// Returns an error, if one exists, associated with processing this entry.
93 ///
94 /// An example of an error is one that occurred while parsing an ignore
95 /// file. Errors related to traversing a directory tree itself are reported
96 /// as part of yielding the directory entry, and not with this method.
97 pub fn error(&self) -> Option<&Error> {
98 self.err.as_ref()
99 }
100
101 /// Returns true if and only if this entry points to a directory.
102 pub(crate) fn is_dir(&self) -> bool {
103 self.dent.is_dir()
104 }
105
106 fn new_stdin() -> DirEntry {
107 DirEntry { dent: DirEntryInner::Stdin, err: None }
108 }
109
110 fn new_walkdir(dent: walkdir::DirEntry, err: Option<Error>) -> DirEntry {
111 DirEntry { dent: DirEntryInner::Walkdir(dent), err }
112 }
113
114 fn new_raw(dent: DirEntryRaw, err: Option<Error>) -> DirEntry {
115 DirEntry { dent: DirEntryInner::Raw(dent), err }
116 }
117}
118
119/// DirEntryInner is the implementation of DirEntry.
120///
121/// It specifically represents three distinct sources of directory entries:
122///
123/// 1. From the walkdir crate.
124/// 2. Special entries that represent things like stdin.
125/// 3. From a path.
126///
127/// Specifically, (3) has to essentially re-create the DirEntry implementation
128/// from WalkDir.
129#[derive(Clone, Debug)]
130enum DirEntryInner {
131 Stdin,
132 Walkdir(walkdir::DirEntry),
133 Raw(DirEntryRaw),
134}
135
136impl DirEntryInner {
137 fn path(&self) -> &Path {
138 use self::DirEntryInner::*;
139 match *self {
140 Stdin => Path::new("<stdin>"),
141 Walkdir(ref x) => x.path(),
142 Raw(ref x) => x.path(),
143 }
144 }
145
146 fn into_path(self) -> PathBuf {
147 use self::DirEntryInner::*;
148 match self {
149 Stdin => PathBuf::from("<stdin>"),
150 Walkdir(x) => x.into_path(),
151 Raw(x) => x.into_path(),
152 }
153 }
154
155 fn path_is_symlink(&self) -> bool {
156 use self::DirEntryInner::*;
157 match *self {
158 Stdin => false,
159 Walkdir(ref x) => x.path_is_symlink(),
160 Raw(ref x) => x.path_is_symlink(),
161 }
162 }
163
164 fn is_stdin(&self) -> bool {
165 match *self {
166 DirEntryInner::Stdin => true,
167 _ => false,
168 }
169 }
170
171 fn metadata(&self) -> Result<Metadata, Error> {
172 use self::DirEntryInner::*;
173 match *self {
174 Stdin => {
175 let err = Error::Io(io::Error::new(
176 io::ErrorKind::Other,
177 "<stdin> has no metadata",
178 ));
179 Err(err.with_path("<stdin>"))
180 }
181 Walkdir(ref x) => x.metadata().map_err(|err| {
182 Error::Io(io::Error::from(err)).with_path(x.path())
183 }),
184 Raw(ref x) => x.metadata(),
185 }
186 }
187
188 fn file_type(&self) -> Option<FileType> {
189 use self::DirEntryInner::*;
190 match *self {
191 Stdin => None,
192 Walkdir(ref x) => Some(x.file_type()),
193 Raw(ref x) => Some(x.file_type()),
194 }
195 }
196
197 fn file_name(&self) -> &OsStr {
198 use self::DirEntryInner::*;
199 match *self {
200 Stdin => OsStr::new("<stdin>"),
201 Walkdir(ref x) => x.file_name(),
202 Raw(ref x) => x.file_name(),
203 }
204 }
205
206 fn depth(&self) -> usize {
207 use self::DirEntryInner::*;
208 match *self {
209 Stdin => 0,
210 Walkdir(ref x) => x.depth(),
211 Raw(ref x) => x.depth(),
212 }
213 }
214
215 #[cfg(unix)]
216 fn ino(&self) -> Option<u64> {
217 use self::DirEntryInner::*;
218 use walkdir::DirEntryExt;
219 match *self {
220 Stdin => None,
221 Walkdir(ref x) => Some(x.ino()),
222 Raw(ref x) => Some(x.ino()),
223 }
224 }
225
226 /// Returns true if and only if this entry points to a directory.
227 fn is_dir(&self) -> bool {
228 self.file_type().map(|ft| ft.is_dir()).unwrap_or(false)
229 }
230}
231
232/// DirEntryRaw is essentially copied from the walkdir crate so that we can
233/// build `DirEntry`s from whole cloth in the parallel iterator.
234#[derive(Clone)]
235struct DirEntryRaw {
236 /// The path as reported by the `fs::ReadDir` iterator (even if it's a
237 /// symbolic link).
238 path: PathBuf,
239 /// The file type. Necessary for recursive iteration, so store it.
240 ty: FileType,
241 /// Is set when this entry was created from a symbolic link and the user
242 /// expects the iterator to follow symbolic links.
243 follow_link: bool,
244 /// The depth at which this entry was generated relative to the root.
245 depth: usize,
246 /// The underlying inode number (Unix only).
247 #[cfg(unix)]
248 ino: u64,
249 /// The underlying metadata (Windows only). We store this on Windows
250 /// because this comes for free while reading a directory.
251 #[cfg(windows)]
252 metadata: fs::Metadata,
253}
254
255impl std::fmt::Debug for DirEntryRaw {
256 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
257 // Leaving out FileType because it doesn't have a debug impl
258 // in Rust 1.9. We could add it if we really wanted to by manually
259 // querying each possibly file type. Meh. ---AG
260 f&mut DebugStruct<'_, '_>.debug_struct("DirEntryRaw")
261 .field("path", &self.path)
262 .field("follow_link", &self.follow_link)
263 .field(name:"depth", &self.depth)
264 .finish()
265 }
266}
267
268impl DirEntryRaw {
269 fn path(&self) -> &Path {
270 &self.path
271 }
272
273 fn into_path(self) -> PathBuf {
274 self.path
275 }
276
277 fn path_is_symlink(&self) -> bool {
278 self.ty.is_symlink() || self.follow_link
279 }
280
281 fn metadata(&self) -> Result<Metadata, Error> {
282 self.metadata_internal()
283 }
284
285 #[cfg(windows)]
286 fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
287 if self.follow_link {
288 fs::metadata(&self.path)
289 } else {
290 Ok(self.metadata.clone())
291 }
292 .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
293 }
294
295 #[cfg(not(windows))]
296 fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
297 if self.follow_link {
298 fs::metadata(&self.path)
299 } else {
300 fs::symlink_metadata(&self.path)
301 }
302 .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
303 }
304
305 fn file_type(&self) -> FileType {
306 self.ty
307 }
308
309 fn file_name(&self) -> &OsStr {
310 self.path.file_name().unwrap_or_else(|| self.path.as_os_str())
311 }
312
313 fn depth(&self) -> usize {
314 self.depth
315 }
316
317 #[cfg(unix)]
318 fn ino(&self) -> u64 {
319 self.ino
320 }
321
322 fn from_entry(
323 depth: usize,
324 ent: &fs::DirEntry,
325 ) -> Result<DirEntryRaw, Error> {
326 let ty = ent.file_type().map_err(|err| {
327 let err = Error::Io(io::Error::from(err)).with_path(ent.path());
328 Error::WithDepth { depth, err: Box::new(err) }
329 })?;
330 DirEntryRaw::from_entry_os(depth, ent, ty)
331 }
332
333 #[cfg(windows)]
334 fn from_entry_os(
335 depth: usize,
336 ent: &fs::DirEntry,
337 ty: fs::FileType,
338 ) -> Result<DirEntryRaw, Error> {
339 let md = ent.metadata().map_err(|err| {
340 let err = Error::Io(io::Error::from(err)).with_path(ent.path());
341 Error::WithDepth { depth, err: Box::new(err) }
342 })?;
343 Ok(DirEntryRaw {
344 path: ent.path(),
345 ty,
346 follow_link: false,
347 depth,
348 metadata: md,
349 })
350 }
351
352 #[cfg(unix)]
353 fn from_entry_os(
354 depth: usize,
355 ent: &fs::DirEntry,
356 ty: fs::FileType,
357 ) -> Result<DirEntryRaw, Error> {
358 use std::os::unix::fs::DirEntryExt;
359
360 Ok(DirEntryRaw {
361 path: ent.path(),
362 ty,
363 follow_link: false,
364 depth,
365 ino: ent.ino(),
366 })
367 }
368
369 // Placeholder implementation to allow compiling on non-standard platforms
370 // (e.g. wasm32).
371 #[cfg(not(any(windows, unix)))]
372 fn from_entry_os(
373 depth: usize,
374 ent: &fs::DirEntry,
375 ty: fs::FileType,
376 ) -> Result<DirEntryRaw, Error> {
377 Err(Error::Io(io::Error::new(
378 io::ErrorKind::Other,
379 "unsupported platform",
380 )))
381 }
382
383 #[cfg(windows)]
384 fn from_path(
385 depth: usize,
386 pb: PathBuf,
387 link: bool,
388 ) -> Result<DirEntryRaw, Error> {
389 let md =
390 fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
391 Ok(DirEntryRaw {
392 path: pb,
393 ty: md.file_type(),
394 follow_link: link,
395 depth,
396 metadata: md,
397 })
398 }
399
400 #[cfg(unix)]
401 fn from_path(
402 depth: usize,
403 pb: PathBuf,
404 link: bool,
405 ) -> Result<DirEntryRaw, Error> {
406 use std::os::unix::fs::MetadataExt;
407
408 let md =
409 fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
410 Ok(DirEntryRaw {
411 path: pb,
412 ty: md.file_type(),
413 follow_link: link,
414 depth,
415 ino: md.ino(),
416 })
417 }
418
419 // Placeholder implementation to allow compiling on non-standard platforms
420 // (e.g. wasm32).
421 #[cfg(not(any(windows, unix)))]
422 fn from_path(
423 depth: usize,
424 pb: PathBuf,
425 link: bool,
426 ) -> Result<DirEntryRaw, Error> {
427 Err(Error::Io(io::Error::new(
428 io::ErrorKind::Other,
429 "unsupported platform",
430 )))
431 }
432}
433
434/// WalkBuilder builds a recursive directory iterator.
435///
436/// The builder supports a large number of configurable options. This includes
437/// specific glob overrides, file type matching, toggling whether hidden
438/// files are ignored or not, and of course, support for respecting gitignore
439/// files.
440///
441/// By default, all ignore files found are respected. This includes `.ignore`,
442/// `.gitignore`, `.git/info/exclude` and even your global gitignore
443/// globs, usually found in `$XDG_CONFIG_HOME/git/ignore`.
444///
445/// Some standard recursive directory options are also supported, such as
446/// limiting the recursive depth or whether to follow symbolic links (disabled
447/// by default).
448///
449/// # Ignore rules
450///
451/// There are many rules that influence whether a particular file or directory
452/// is skipped by this iterator. Those rules are documented here. Note that
453/// the rules assume a default configuration.
454///
455/// * First, glob overrides are checked. If a path matches a glob override,
456/// then matching stops. The path is then only skipped if the glob that matched
457/// the path is an ignore glob. (An override glob is a whitelist glob unless it
458/// starts with a `!`, in which case it is an ignore glob.)
459/// * Second, ignore files are checked. Ignore files currently only come from
460/// git ignore files (`.gitignore`, `.git/info/exclude` and the configured
461/// global gitignore file), plain `.ignore` files, which have the same format
462/// as gitignore files, or explicitly added ignore files. The precedence order
463/// is: `.ignore`, `.gitignore`, `.git/info/exclude`, global gitignore and
464/// finally explicitly added ignore files. Note that precedence between
465/// different types of ignore files is not impacted by the directory hierarchy;
466/// any `.ignore` file overrides all `.gitignore` files. Within each precedence
467/// level, more nested ignore files have a higher precedence than less nested
468/// ignore files.
469/// * Third, if the previous step yields an ignore match, then all matching
470/// is stopped and the path is skipped. If it yields a whitelist match, then
471/// matching continues. A whitelist match can be overridden by a later matcher.
472/// * Fourth, unless the path is a directory, the file type matcher is run on
473/// the path. As above, if it yields an ignore match, then all matching is
474/// stopped and the path is skipped. If it yields a whitelist match, then
475/// matching continues.
476/// * Fifth, if the path hasn't been whitelisted and it is hidden, then the
477/// path is skipped.
478/// * Sixth, unless the path is a directory, the size of the file is compared
479/// against the max filesize limit. If it exceeds the limit, it is skipped.
480/// * Seventh, if the path has made it this far then it is yielded in the
481/// iterator.
482#[derive(Clone)]
483pub struct WalkBuilder {
484 paths: Vec<PathBuf>,
485 ig_builder: IgnoreBuilder,
486 max_depth: Option<usize>,
487 max_filesize: Option<u64>,
488 follow_links: bool,
489 same_file_system: bool,
490 sorter: Option<Sorter>,
491 threads: usize,
492 skip: Option<Arc<Handle>>,
493 filter: Option<Filter>,
494}
495
496#[derive(Clone)]
497enum Sorter {
498 ByName(Arc<dyn Fn(&OsStr, &OsStr) -> Ordering + Send + Sync + 'static>),
499 ByPath(Arc<dyn Fn(&Path, &Path) -> Ordering + Send + Sync + 'static>),
500}
501
502#[derive(Clone)]
503struct Filter(Arc<dyn Fn(&DirEntry) -> bool + Send + Sync + 'static>);
504
505impl std::fmt::Debug for WalkBuilder {
506 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
507 f&mut DebugStruct<'_, '_>.debug_struct("WalkBuilder")
508 .field("paths", &self.paths)
509 .field("ig_builder", &self.ig_builder)
510 .field("max_depth", &self.max_depth)
511 .field("max_filesize", &self.max_filesize)
512 .field("follow_links", &self.follow_links)
513 .field("threads", &self.threads)
514 .field(name:"skip", &self.skip)
515 .finish()
516 }
517}
518
519impl WalkBuilder {
520 /// Create a new builder for a recursive directory iterator for the
521 /// directory given.
522 ///
523 /// Note that if you want to traverse multiple different directories, it
524 /// is better to call `add` on this builder than to create multiple
525 /// `Walk` values.
526 pub fn new<P: AsRef<Path>>(path: P) -> WalkBuilder {
527 WalkBuilder {
528 paths: vec![path.as_ref().to_path_buf()],
529 ig_builder: IgnoreBuilder::new(),
530 max_depth: None,
531 max_filesize: None,
532 follow_links: false,
533 same_file_system: false,
534 sorter: None,
535 threads: 0,
536 skip: None,
537 filter: None,
538 }
539 }
540
541 /// Build a new `Walk` iterator.
542 pub fn build(&self) -> Walk {
543 let follow_links = self.follow_links;
544 let max_depth = self.max_depth;
545 let sorter = self.sorter.clone();
546 let its = self
547 .paths
548 .iter()
549 .map(move |p| {
550 if p == Path::new("-") {
551 (p.to_path_buf(), None)
552 } else {
553 let mut wd = WalkDir::new(p);
554 wd = wd.follow_links(follow_links || p.is_file());
555 wd = wd.same_file_system(self.same_file_system);
556 if let Some(max_depth) = max_depth {
557 wd = wd.max_depth(max_depth);
558 }
559 if let Some(ref sorter) = sorter {
560 match sorter.clone() {
561 Sorter::ByName(cmp) => {
562 wd = wd.sort_by(move |a, b| {
563 cmp(a.file_name(), b.file_name())
564 });
565 }
566 Sorter::ByPath(cmp) => {
567 wd = wd.sort_by(move |a, b| {
568 cmp(a.path(), b.path())
569 });
570 }
571 }
572 }
573 (p.to_path_buf(), Some(WalkEventIter::from(wd)))
574 }
575 })
576 .collect::<Vec<_>>()
577 .into_iter();
578 let ig_root = self.ig_builder.build();
579 Walk {
580 its,
581 it: None,
582 ig_root: ig_root.clone(),
583 ig: ig_root.clone(),
584 max_filesize: self.max_filesize,
585 skip: self.skip.clone(),
586 filter: self.filter.clone(),
587 }
588 }
589
590 /// Build a new `WalkParallel` iterator.
591 ///
592 /// Note that this *doesn't* return something that implements `Iterator`.
593 /// Instead, the returned value must be run with a closure. e.g.,
594 /// `builder.build_parallel().run(|| |path| { println!("{path:?}"); WalkState::Continue })`.
595 pub fn build_parallel(&self) -> WalkParallel {
596 WalkParallel {
597 paths: self.paths.clone().into_iter(),
598 ig_root: self.ig_builder.build(),
599 max_depth: self.max_depth,
600 max_filesize: self.max_filesize,
601 follow_links: self.follow_links,
602 same_file_system: self.same_file_system,
603 threads: self.threads,
604 skip: self.skip.clone(),
605 filter: self.filter.clone(),
606 }
607 }
608
609 /// Add a file path to the iterator.
610 ///
611 /// Each additional file path added is traversed recursively. This should
612 /// be preferred over building multiple `Walk` iterators since this
613 /// enables reusing resources across iteration.
614 pub fn add<P: AsRef<Path>>(&mut self, path: P) -> &mut WalkBuilder {
615 self.paths.push(path.as_ref().to_path_buf());
616 self
617 }
618
619 /// The maximum depth to recurse.
620 ///
621 /// The default, `None`, imposes no depth restriction.
622 pub fn max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder {
623 self.max_depth = depth;
624 self
625 }
626
627 /// Whether to follow symbolic links or not.
628 pub fn follow_links(&mut self, yes: bool) -> &mut WalkBuilder {
629 self.follow_links = yes;
630 self
631 }
632
633 /// Whether to ignore files above the specified limit.
634 pub fn max_filesize(&mut self, filesize: Option<u64>) -> &mut WalkBuilder {
635 self.max_filesize = filesize;
636 self
637 }
638
639 /// The number of threads to use for traversal.
640 ///
641 /// Note that this only has an effect when using `build_parallel`.
642 ///
643 /// The default setting is `0`, which chooses the number of threads
644 /// automatically using heuristics.
645 pub fn threads(&mut self, n: usize) -> &mut WalkBuilder {
646 self.threads = n;
647 self
648 }
649
650 /// Add a global ignore file to the matcher.
651 ///
652 /// This has lower precedence than all other sources of ignore rules.
653 ///
654 /// If there was a problem adding the ignore file, then an error is
655 /// returned. Note that the error may indicate *partial* failure. For
656 /// example, if an ignore file contains an invalid glob, all other globs
657 /// are still applied.
658 pub fn add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> {
659 let mut builder = GitignoreBuilder::new("");
660 let mut errs = PartialErrorBuilder::default();
661 errs.maybe_push(builder.add(path));
662 match builder.build() {
663 Ok(gi) => {
664 self.ig_builder.add_ignore(gi);
665 }
666 Err(err) => {
667 errs.push(err);
668 }
669 }
670 errs.into_error_option()
671 }
672
673 /// Add a custom ignore file name
674 ///
675 /// These ignore files have higher precedence than all other ignore files.
676 ///
677 /// When specifying multiple names, earlier names have lower precedence than
678 /// later names.
679 pub fn add_custom_ignore_filename<S: AsRef<OsStr>>(
680 &mut self,
681 file_name: S,
682 ) -> &mut WalkBuilder {
683 self.ig_builder.add_custom_ignore_filename(file_name);
684 self
685 }
686
687 /// Add an override matcher.
688 ///
689 /// By default, no override matcher is used.
690 ///
691 /// This overrides any previous setting.
692 pub fn overrides(&mut self, overrides: Override) -> &mut WalkBuilder {
693 self.ig_builder.overrides(overrides);
694 self
695 }
696
697 /// Add a file type matcher.
698 ///
699 /// By default, no file type matcher is used.
700 ///
701 /// This overrides any previous setting.
702 pub fn types(&mut self, types: Types) -> &mut WalkBuilder {
703 self.ig_builder.types(types);
704 self
705 }
706
707 /// Enables all the standard ignore filters.
708 ///
709 /// This toggles, as a group, all the filters that are enabled by default:
710 ///
711 /// - [hidden()](#method.hidden)
712 /// - [parents()](#method.parents)
713 /// - [ignore()](#method.ignore)
714 /// - [git_ignore()](#method.git_ignore)
715 /// - [git_global()](#method.git_global)
716 /// - [git_exclude()](#method.git_exclude)
717 ///
718 /// They may still be toggled individually after calling this function.
719 ///
720 /// This is (by definition) enabled by default.
721 pub fn standard_filters(&mut self, yes: bool) -> &mut WalkBuilder {
722 self.hidden(yes)
723 .parents(yes)
724 .ignore(yes)
725 .git_ignore(yes)
726 .git_global(yes)
727 .git_exclude(yes)
728 }
729
730 /// Enables ignoring hidden files.
731 ///
732 /// This is enabled by default.
733 pub fn hidden(&mut self, yes: bool) -> &mut WalkBuilder {
734 self.ig_builder.hidden(yes);
735 self
736 }
737
738 /// Enables reading ignore files from parent directories.
739 ///
740 /// If this is enabled, then .gitignore files in parent directories of each
741 /// file path given are respected. Otherwise, they are ignored.
742 ///
743 /// This is enabled by default.
744 pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder {
745 self.ig_builder.parents(yes);
746 self
747 }
748
749 /// Enables reading `.ignore` files.
750 ///
751 /// `.ignore` files have the same semantics as `gitignore` files and are
752 /// supported by search tools such as ripgrep and The Silver Searcher.
753 ///
754 /// This is enabled by default.
755 pub fn ignore(&mut self, yes: bool) -> &mut WalkBuilder {
756 self.ig_builder.ignore(yes);
757 self
758 }
759
760 /// Enables reading a global gitignore file, whose path is specified in
761 /// git's `core.excludesFile` config option.
762 ///
763 /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig`
764 /// does not exist or does not specify `core.excludesFile`, then
765 /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
766 /// set or is empty, then `$HOME/.config/git/ignore` is used instead.
767 ///
768 /// This is enabled by default.
769 pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder {
770 self.ig_builder.git_global(yes);
771 self
772 }
773
774 /// Enables reading `.gitignore` files.
775 ///
776 /// `.gitignore` files have match semantics as described in the `gitignore`
777 /// man page.
778 ///
779 /// This is enabled by default.
780 pub fn git_ignore(&mut self, yes: bool) -> &mut WalkBuilder {
781 self.ig_builder.git_ignore(yes);
782 self
783 }
784
785 /// Enables reading `.git/info/exclude` files.
786 ///
787 /// `.git/info/exclude` files have match semantics as described in the
788 /// `gitignore` man page.
789 ///
790 /// This is enabled by default.
791 pub fn git_exclude(&mut self, yes: bool) -> &mut WalkBuilder {
792 self.ig_builder.git_exclude(yes);
793 self
794 }
795
796 /// Whether a git repository is required to apply git-related ignore
797 /// rules (global rules, .gitignore and local exclude rules).
798 ///
799 /// When disabled, git-related ignore rules are applied even when searching
800 /// outside a git repository.
801 pub fn require_git(&mut self, yes: bool) -> &mut WalkBuilder {
802 self.ig_builder.require_git(yes);
803 self
804 }
805
806 /// Process ignore files case insensitively
807 ///
808 /// This is disabled by default.
809 pub fn ignore_case_insensitive(&mut self, yes: bool) -> &mut WalkBuilder {
810 self.ig_builder.ignore_case_insensitive(yes);
811 self
812 }
813
814 /// Set a function for sorting directory entries by their path.
815 ///
816 /// If a compare function is set, the resulting iterator will return all
817 /// paths in sorted order. The compare function will be called to compare
818 /// entries from the same directory.
819 ///
820 /// This is like `sort_by_file_name`, except the comparator accepts
821 /// a `&Path` instead of the base file name, which permits it to sort by
822 /// more criteria.
823 ///
824 /// This method will override any previous sorter set by this method or
825 /// by `sort_by_file_name`.
826 ///
827 /// Note that this is not used in the parallel iterator.
828 pub fn sort_by_file_path<F>(&mut self, cmp: F) -> &mut WalkBuilder
829 where
830 F: Fn(&Path, &Path) -> Ordering + Send + Sync + 'static,
831 {
832 self.sorter = Some(Sorter::ByPath(Arc::new(cmp)));
833 self
834 }
835
836 /// Set a function for sorting directory entries by file name.
837 ///
838 /// If a compare function is set, the resulting iterator will return all
839 /// paths in sorted order. The compare function will be called to compare
840 /// names from entries from the same directory using only the name of the
841 /// entry.
842 ///
843 /// This method will override any previous sorter set by this method or
844 /// by `sort_by_file_path`.
845 ///
846 /// Note that this is not used in the parallel iterator.
847 pub fn sort_by_file_name<F>(&mut self, cmp: F) -> &mut WalkBuilder
848 where
849 F: Fn(&OsStr, &OsStr) -> Ordering + Send + Sync + 'static,
850 {
851 self.sorter = Some(Sorter::ByName(Arc::new(cmp)));
852 self
853 }
854
855 /// Do not cross file system boundaries.
856 ///
857 /// When this option is enabled, directory traversal will not descend into
858 /// directories that are on a different file system from the root path.
859 ///
860 /// Currently, this option is only supported on Unix and Windows. If this
861 /// option is used on an unsupported platform, then directory traversal
862 /// will immediately return an error and will not yield any entries.
863 pub fn same_file_system(&mut self, yes: bool) -> &mut WalkBuilder {
864 self.same_file_system = yes;
865 self
866 }
867
868 /// Do not yield directory entries that are believed to correspond to
869 /// stdout.
870 ///
871 /// This is useful when a command is invoked via shell redirection to a
872 /// file that is also being read. For example, `grep -r foo ./ > results`
873 /// might end up trying to search `results` even though it is also writing
874 /// to it, which could cause an unbounded feedback loop. Setting this
875 /// option prevents this from happening by skipping over the `results`
876 /// file.
877 ///
878 /// This is disabled by default.
879 pub fn skip_stdout(&mut self, yes: bool) -> &mut WalkBuilder {
880 if yes {
881 self.skip = stdout_handle().map(Arc::new);
882 } else {
883 self.skip = None;
884 }
885 self
886 }
887
888 /// Yields only entries which satisfy the given predicate and skips
889 /// descending into directories that do not satisfy the given predicate.
890 ///
891 /// The predicate is applied to all entries. If the predicate is
892 /// true, iteration carries on as normal. If the predicate is false, the
893 /// entry is ignored and if it is a directory, it is not descended into.
894 ///
895 /// Note that the errors for reading entries that may not satisfy the
896 /// predicate will still be yielded.
897 pub fn filter_entry<P>(&mut self, filter: P) -> &mut WalkBuilder
898 where
899 P: Fn(&DirEntry) -> bool + Send + Sync + 'static,
900 {
901 self.filter = Some(Filter(Arc::new(filter)));
902 self
903 }
904}
905
906/// Walk is a recursive directory iterator over file paths in one or more
907/// directories.
908///
909/// Only file and directory paths matching the rules are returned. By default,
910/// ignore files like `.gitignore` are respected. The precise matching rules
911/// and precedence is explained in the documentation for `WalkBuilder`.
912pub struct Walk {
913 its: std::vec::IntoIter<(PathBuf, Option<WalkEventIter>)>,
914 it: Option<WalkEventIter>,
915 ig_root: Ignore,
916 ig: Ignore,
917 max_filesize: Option<u64>,
918 skip: Option<Arc<Handle>>,
919 filter: Option<Filter>,
920}
921
922impl Walk {
923 /// Creates a new recursive directory iterator for the file path given.
924 ///
925 /// Note that this uses default settings, which include respecting
926 /// `.gitignore` files. To configure the iterator, use `WalkBuilder`
927 /// instead.
928 pub fn new<P: AsRef<Path>>(path: P) -> Walk {
929 WalkBuilder::new(path).build()
930 }
931
932 fn skip_entry(&self, ent: &DirEntry) -> Result<bool, Error> {
933 if ent.depth() == 0 {
934 return Ok(false);
935 }
936 // We ensure that trivial skipping is done before any other potentially
937 // expensive operations (stat, filesystem other) are done. This seems
938 // like an obvious optimization but becomes critical when filesystem
939 // operations even as simple as stat can result in significant
940 // overheads; an example of this was a bespoke filesystem layer in
941 // Windows that hosted files remotely and would download them on-demand
942 // when particular filesystem operations occurred. Users of this system
943 // who ensured correct file-type filters were being used could still
944 // get unnecessary file access resulting in large downloads.
945 if should_skip_entry(&self.ig, ent) {
946 return Ok(true);
947 }
948 if let Some(ref stdout) = self.skip {
949 if path_equals(ent, stdout)? {
950 return Ok(true);
951 }
952 }
953 if self.max_filesize.is_some() && !ent.is_dir() {
954 return Ok(skip_filesize(
955 self.max_filesize.unwrap(),
956 ent.path(),
957 &ent.metadata().ok(),
958 ));
959 }
960 if let Some(Filter(filter)) = &self.filter {
961 if !filter(ent) {
962 return Ok(true);
963 }
964 }
965 Ok(false)
966 }
967}
968
969impl Iterator for Walk {
970 type Item = Result<DirEntry, Error>;
971
972 #[inline(always)]
973 fn next(&mut self) -> Option<Result<DirEntry, Error>> {
974 loop {
975 let ev = match self.it.as_mut().and_then(|it| it.next()) {
976 Some(ev) => ev,
977 None => {
978 match self.its.next() {
979 None => return None,
980 Some((_, None)) => {
981 return Some(Ok(DirEntry::new_stdin()));
982 }
983 Some((path, Some(it))) => {
984 self.it = Some(it);
985 if path.is_dir() {
986 let (ig, err) = self.ig_root.add_parents(path);
987 self.ig = ig;
988 if let Some(err) = err {
989 return Some(Err(err));
990 }
991 } else {
992 self.ig = self.ig_root.clone();
993 }
994 }
995 }
996 continue;
997 }
998 };
999 match ev {
1000 Err(err) => {
1001 return Some(Err(Error::from_walkdir(err)));
1002 }
1003 Ok(WalkEvent::Exit) => {
1004 self.ig = self.ig.parent().unwrap();
1005 }
1006 Ok(WalkEvent::Dir(ent)) => {
1007 let mut ent = DirEntry::new_walkdir(ent, None);
1008 let should_skip = match self.skip_entry(&ent) {
1009 Err(err) => return Some(Err(err)),
1010 Ok(should_skip) => should_skip,
1011 };
1012 if should_skip {
1013 self.it.as_mut().unwrap().it.skip_current_dir();
1014 // Still need to push this on the stack because
1015 // we'll get a WalkEvent::Exit event for this dir.
1016 // We don't care if it errors though.
1017 let (igtmp, _) = self.ig.add_child(ent.path());
1018 self.ig = igtmp;
1019 continue;
1020 }
1021 let (igtmp, err) = self.ig.add_child(ent.path());
1022 self.ig = igtmp;
1023 ent.err = err;
1024 return Some(Ok(ent));
1025 }
1026 Ok(WalkEvent::File(ent)) => {
1027 let ent = DirEntry::new_walkdir(ent, None);
1028 let should_skip = match self.skip_entry(&ent) {
1029 Err(err) => return Some(Err(err)),
1030 Ok(should_skip) => should_skip,
1031 };
1032 if should_skip {
1033 continue;
1034 }
1035 return Some(Ok(ent));
1036 }
1037 }
1038 }
1039 }
1040}
1041
1042impl std::iter::FusedIterator for Walk {}
1043
1044/// WalkEventIter transforms a WalkDir iterator into an iterator that more
1045/// accurately describes the directory tree. Namely, it emits events that are
1046/// one of three types: directory, file or "exit." An "exit" event means that
1047/// the entire contents of a directory have been enumerated.
1048struct WalkEventIter {
1049 depth: usize,
1050 it: walkdir::IntoIter,
1051 next: Option<Result<walkdir::DirEntry, walkdir::Error>>,
1052}
1053
1054#[derive(Debug)]
1055enum WalkEvent {
1056 Dir(walkdir::DirEntry),
1057 File(walkdir::DirEntry),
1058 Exit,
1059}
1060
1061impl From<WalkDir> for WalkEventIter {
1062 fn from(it: WalkDir) -> WalkEventIter {
1063 WalkEventIter { depth: 0, it: it.into_iter(), next: None }
1064 }
1065}
1066
1067impl Iterator for WalkEventIter {
1068 type Item = walkdir::Result<WalkEvent>;
1069
1070 #[inline(always)]
1071 fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
1072 let dent = self.next.take().or_else(|| self.it.next());
1073 let depth = match dent {
1074 None => 0,
1075 Some(Ok(ref dent)) => dent.depth(),
1076 Some(Err(ref err)) => err.depth(),
1077 };
1078 if depth < self.depth {
1079 self.depth -= 1;
1080 self.next = dent;
1081 return Some(Ok(WalkEvent::Exit));
1082 }
1083 self.depth = depth;
1084 match dent {
1085 None => None,
1086 Some(Err(err)) => Some(Err(err)),
1087 Some(Ok(dent)) => {
1088 if walkdir_is_dir(&dent) {
1089 self.depth += 1;
1090 Some(Ok(WalkEvent::Dir(dent)))
1091 } else {
1092 Some(Ok(WalkEvent::File(dent)))
1093 }
1094 }
1095 }
1096 }
1097}
1098
1099/// WalkState is used in the parallel recursive directory iterator to indicate
1100/// whether walking should continue as normal, skip descending into a
1101/// particular directory or quit the walk entirely.
1102#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1103pub enum WalkState {
1104 /// Continue walking as normal.
1105 Continue,
1106 /// If the directory entry given is a directory, don't descend into it.
1107 /// In all other cases, this has no effect.
1108 Skip,
1109 /// Quit the entire iterator as soon as possible.
1110 ///
1111 /// Note that this is an inherently asynchronous action. It is possible
1112 /// for more entries to be yielded even after instructing the iterator
1113 /// to quit.
1114 Quit,
1115}
1116
1117impl WalkState {
1118 fn is_continue(&self) -> bool {
1119 *self == WalkState::Continue
1120 }
1121
1122 fn is_quit(&self) -> bool {
1123 *self == WalkState::Quit
1124 }
1125}
1126
1127/// A builder for constructing a visitor when using [`WalkParallel::visit`].
1128/// The builder will be called for each thread started by `WalkParallel`. The
1129/// visitor returned from each builder is then called for every directory
1130/// entry.
1131pub trait ParallelVisitorBuilder<'s> {
1132 /// Create per-thread `ParallelVisitor`s for `WalkParallel`.
1133 fn build(&mut self) -> Box<dyn ParallelVisitor + 's>;
1134}
1135
1136impl<'a, 's, P: ParallelVisitorBuilder<'s>> ParallelVisitorBuilder<'s>
1137 for &'a mut P
1138{
1139 fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
1140 (**self).build()
1141 }
1142}
1143
1144/// Receives files and directories for the current thread.
1145///
1146/// Setup for the traversal can be implemented as part of
1147/// [`ParallelVisitorBuilder::build`]. Teardown when traversal finishes can be
1148/// implemented by implementing the `Drop` trait on your traversal type.
1149pub trait ParallelVisitor: Send {
1150 /// Receives files and directories for the current thread. This is called
1151 /// once for every directory entry visited by traversal.
1152 fn visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState;
1153}
1154
1155struct FnBuilder<F> {
1156 builder: F,
1157}
1158
1159impl<'s, F: FnMut() -> FnVisitor<'s>> ParallelVisitorBuilder<'s>
1160 for FnBuilder<F>
1161{
1162 fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
1163 let visitor: Box) -> … + Send + 's> = (self.builder)();
1164 Box::new(FnVisitorImp { visitor })
1165 }
1166}
1167
1168type FnVisitor<'s> =
1169 Box<dyn FnMut(Result<DirEntry, Error>) -> WalkState + Send + 's>;
1170
1171struct FnVisitorImp<'s> {
1172 visitor: FnVisitor<'s>,
1173}
1174
1175impl<'s> ParallelVisitor for FnVisitorImp<'s> {
1176 fn visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState {
1177 (self.visitor)(entry)
1178 }
1179}
1180
1181/// WalkParallel is a parallel recursive directory iterator over files paths
1182/// in one or more directories.
1183///
1184/// Only file and directory paths matching the rules are returned. By default,
1185/// ignore files like `.gitignore` are respected. The precise matching rules
1186/// and precedence is explained in the documentation for `WalkBuilder`.
1187///
1188/// Unlike `Walk`, this uses multiple threads for traversing a directory.
1189pub struct WalkParallel {
1190 paths: std::vec::IntoIter<PathBuf>,
1191 ig_root: Ignore,
1192 max_filesize: Option<u64>,
1193 max_depth: Option<usize>,
1194 follow_links: bool,
1195 same_file_system: bool,
1196 threads: usize,
1197 skip: Option<Arc<Handle>>,
1198 filter: Option<Filter>,
1199}
1200
1201impl WalkParallel {
1202 /// Execute the parallel recursive directory iterator. `mkf` is called
1203 /// for each thread used for iteration. The function produced by `mkf`
1204 /// is then in turn called for each visited file path.
1205 pub fn run<'s, F>(self, mkf: F)
1206 where
1207 F: FnMut() -> FnVisitor<'s>,
1208 {
1209 self.visit(&mut FnBuilder { builder: mkf })
1210 }
1211
1212 /// Execute the parallel recursive directory iterator using a custom
1213 /// visitor.
1214 ///
1215 /// The builder given is used to construct a visitor for every thread
1216 /// used by this traversal. The visitor returned from each builder is then
1217 /// called for every directory entry seen by that thread.
1218 ///
1219 /// Typically, creating a custom visitor is useful if you need to perform
1220 /// some kind of cleanup once traversal is finished. This can be achieved
1221 /// by implementing `Drop` for your builder (or for your visitor, if you
1222 /// want to execute cleanup for every thread that is launched).
1223 ///
1224 /// For example, each visitor might build up a data structure of results
1225 /// corresponding to the directory entries seen for each thread. Since each
1226 /// visitor runs on only one thread, this build-up can be done without
1227 /// synchronization. Then, once traversal is complete, all of the results
1228 /// can be merged together into a single data structure.
1229 pub fn visit(mut self, builder: &mut dyn ParallelVisitorBuilder<'_>) {
1230 let threads = self.threads();
1231 let mut stack = vec![];
1232 {
1233 let mut visitor = builder.build();
1234 let mut paths = Vec::new().into_iter();
1235 std::mem::swap(&mut paths, &mut self.paths);
1236 // Send the initial set of root paths to the pool of workers. Note
1237 // that we only send directories. For files, we send to them the
1238 // callback directly.
1239 for path in paths {
1240 let (dent, root_device) = if path == Path::new("-") {
1241 (DirEntry::new_stdin(), None)
1242 } else {
1243 let root_device = if !self.same_file_system {
1244 None
1245 } else {
1246 match device_num(&path) {
1247 Ok(root_device) => Some(root_device),
1248 Err(err) => {
1249 let err = Error::Io(err).with_path(path);
1250 if visitor.visit(Err(err)).is_quit() {
1251 return;
1252 }
1253 continue;
1254 }
1255 }
1256 };
1257 match DirEntryRaw::from_path(0, path, false) {
1258 Ok(dent) => {
1259 (DirEntry::new_raw(dent, None), root_device)
1260 }
1261 Err(err) => {
1262 if visitor.visit(Err(err)).is_quit() {
1263 return;
1264 }
1265 continue;
1266 }
1267 }
1268 };
1269 stack.push(Message::Work(Work {
1270 dent,
1271 ignore: self.ig_root.clone(),
1272 root_device,
1273 }));
1274 }
1275 // ... but there's no need to start workers if we don't need them.
1276 if stack.is_empty() {
1277 return;
1278 }
1279 }
1280 // Create the workers and then wait for them to finish.
1281 let quit_now = Arc::new(AtomicBool::new(false));
1282 let active_workers = Arc::new(AtomicUsize::new(threads));
1283 let stacks = Stack::new_for_each_thread(threads, stack);
1284 std::thread::scope(|s| {
1285 let handles: Vec<_> = stacks
1286 .into_iter()
1287 .map(|stack| Worker {
1288 visitor: builder.build(),
1289 stack,
1290 quit_now: quit_now.clone(),
1291 active_workers: active_workers.clone(),
1292 max_depth: self.max_depth,
1293 max_filesize: self.max_filesize,
1294 follow_links: self.follow_links,
1295 skip: self.skip.clone(),
1296 filter: self.filter.clone(),
1297 })
1298 .map(|worker| s.spawn(|| worker.run()))
1299 .collect();
1300 for handle in handles {
1301 handle.join().unwrap();
1302 }
1303 });
1304 }
1305
1306 fn threads(&self) -> usize {
1307 if self.threads == 0 {
1308 2
1309 } else {
1310 self.threads
1311 }
1312 }
1313}
1314
1315/// Message is the set of instructions that a worker knows how to process.
1316enum Message {
1317 /// A work item corresponds to a directory that should be descended into.
1318 /// Work items for entries that should be skipped or ignored should not
1319 /// be produced.
1320 Work(Work),
1321 /// This instruction indicates that the worker should quit.
1322 Quit,
1323}
1324
1325/// A unit of work for each worker to process.
1326///
1327/// Each unit of work corresponds to a directory that should be descended
1328/// into.
1329struct Work {
1330 /// The directory entry.
1331 dent: DirEntry,
1332 /// Any ignore matchers that have been built for this directory's parents.
1333 ignore: Ignore,
1334 /// The root device number. When present, only files with the same device
1335 /// number should be considered.
1336 root_device: Option<u64>,
1337}
1338
1339impl Work {
1340 /// Returns true if and only if this work item is a directory.
1341 fn is_dir(&self) -> bool {
1342 self.dent.is_dir()
1343 }
1344
1345 /// Returns true if and only if this work item is a symlink.
1346 fn is_symlink(&self) -> bool {
1347 self.dent.file_type().map_or(false, |ft| ft.is_symlink())
1348 }
1349
1350 /// Adds ignore rules for parent directories.
1351 ///
1352 /// Note that this only applies to entries at depth 0. On all other
1353 /// entries, this is a no-op.
1354 fn add_parents(&mut self) -> Option<Error> {
1355 if self.dent.depth() > 0 {
1356 return None;
1357 }
1358 // At depth 0, the path of this entry is a root path, so we can
1359 // use it directly to add parent ignore rules.
1360 let (ig, err) = self.ignore.add_parents(self.dent.path());
1361 self.ignore = ig;
1362 err
1363 }
1364
1365 /// Reads the directory contents of this work item and adds ignore
1366 /// rules for this directory.
1367 ///
1368 /// If there was a problem with reading the directory contents, then
1369 /// an error is returned. If there was a problem reading the ignore
1370 /// rules for this directory, then the error is attached to this
1371 /// work item's directory entry.
1372 fn read_dir(&mut self) -> Result<fs::ReadDir, Error> {
1373 let readdir = match fs::read_dir(self.dent.path()) {
1374 Ok(readdir) => readdir,
1375 Err(err) => {
1376 let err = Error::from(err)
1377 .with_path(self.dent.path())
1378 .with_depth(self.dent.depth());
1379 return Err(err);
1380 }
1381 };
1382 let (ig, err) = self.ignore.add_child(self.dent.path());
1383 self.ignore = ig;
1384 self.dent.err = err;
1385 Ok(readdir)
1386 }
1387}
1388
1389/// A work-stealing stack.
1390#[derive(Debug)]
1391struct Stack {
1392 /// This thread's index.
1393 index: usize,
1394 /// The thread-local stack.
1395 deque: Deque<Message>,
1396 /// The work stealers.
1397 stealers: Arc<[Stealer<Message>]>,
1398}
1399
1400impl Stack {
1401 /// Create a work-stealing stack for each thread. The given messages
1402 /// correspond to the initial paths to start the search at. They will
1403 /// be distributed automatically to each stack in a round-robin fashion.
1404 fn new_for_each_thread(threads: usize, init: Vec<Message>) -> Vec<Stack> {
1405 // Using new_lifo() ensures each worker operates depth-first, not
1406 // breadth-first. We do depth-first because a breadth first traversal
1407 // on wide directories with a lot of gitignores is disastrous (for
1408 // example, searching a directory tree containing all of crates.io).
1409 let deques: Vec<Deque<Message>> =
1410 std::iter::repeat_with(Deque::new_lifo).take(threads).collect();
1411 let stealers = Arc::<[Stealer<Message>]>::from(
1412 deques.iter().map(Deque::stealer).collect::<Vec<_>>(),
1413 );
1414 let stacks: Vec<Stack> = deques
1415 .into_iter()
1416 .enumerate()
1417 .map(|(index, deque)| Stack {
1418 index,
1419 deque,
1420 stealers: stealers.clone(),
1421 })
1422 .collect();
1423 // Distribute the initial messages.
1424 init.into_iter()
1425 .zip(stacks.iter().cycle())
1426 .for_each(|(m, s)| s.push(m));
1427 stacks
1428 }
1429
1430 /// Push a message.
1431 fn push(&self, msg: Message) {
1432 self.deque.push(msg);
1433 }
1434
1435 /// Pop a message.
1436 fn pop(&self) -> Option<Message> {
1437 self.deque.pop().or_else(|| self.steal())
1438 }
1439
1440 /// Steal a message from another queue.
1441 fn steal(&self) -> Option<Message> {
1442 // For fairness, try to steal from index + 1, index + 2, ... len - 1,
1443 // then wrap around to 0, 1, ... index - 1.
1444 let (left, right) = self.stealers.split_at(self.index);
1445 // Don't steal from ourselves
1446 let right = &right[1..];
1447
1448 right
1449 .iter()
1450 .chain(left.iter())
1451 .map(|s| s.steal_batch_and_pop(&self.deque))
1452 .find_map(|s| s.success())
1453 }
1454}
1455
1456/// A worker is responsible for descending into directories, updating the
1457/// ignore matchers, producing new work and invoking the caller's callback.
1458///
1459/// Note that a worker is *both* a producer and a consumer.
1460struct Worker<'s> {
1461 /// The caller's callback.
1462 visitor: Box<dyn ParallelVisitor + 's>,
1463 /// A work-stealing stack of work to do.
1464 ///
1465 /// We use a stack instead of a channel because a stack lets us visit
1466 /// directories in depth first order. This can substantially reduce peak
1467 /// memory usage by keeping both the number of file paths and gitignore
1468 /// matchers in memory lower.
1469 stack: Stack,
1470 /// Whether all workers should terminate at the next opportunity. Note
1471 /// that we need this because we don't want other `Work` to be done after
1472 /// we quit. We wouldn't need this if have a priority channel.
1473 quit_now: Arc<AtomicBool>,
1474 /// The number of currently active workers.
1475 active_workers: Arc<AtomicUsize>,
1476 /// The maximum depth of directories to descend. A value of `0` means no
1477 /// descension at all.
1478 max_depth: Option<usize>,
1479 /// The maximum size a searched file can be (in bytes). If a file exceeds
1480 /// this size it will be skipped.
1481 max_filesize: Option<u64>,
1482 /// Whether to follow symbolic links or not. When this is enabled, loop
1483 /// detection is performed.
1484 follow_links: bool,
1485 /// A file handle to skip, currently is either `None` or stdout, if it's
1486 /// a file and it has been requested to skip files identical to stdout.
1487 skip: Option<Arc<Handle>>,
1488 /// A predicate applied to dir entries. If true, the entry and all
1489 /// children will be skipped.
1490 filter: Option<Filter>,
1491}
1492
1493impl<'s> Worker<'s> {
1494 /// Runs this worker until there is no more work left to do.
1495 ///
1496 /// The worker will call the caller's callback for all entries that aren't
1497 /// skipped by the ignore matcher.
1498 fn run(mut self) {
1499 while let Some(work) = self.get_work() {
1500 if let WalkState::Quit = self.run_one(work) {
1501 self.quit_now();
1502 }
1503 }
1504 }
1505
1506 fn run_one(&mut self, mut work: Work) -> WalkState {
1507 // If the work is not a directory, then we can just execute the
1508 // caller's callback immediately and move on.
1509 if work.is_symlink() || !work.is_dir() {
1510 return self.visitor.visit(Ok(work.dent));
1511 }
1512 if let Some(err) = work.add_parents() {
1513 let state = self.visitor.visit(Err(err));
1514 if state.is_quit() {
1515 return state;
1516 }
1517 }
1518
1519 let descend = if let Some(root_device) = work.root_device {
1520 match is_same_file_system(root_device, work.dent.path()) {
1521 Ok(true) => true,
1522 Ok(false) => false,
1523 Err(err) => {
1524 let state = self.visitor.visit(Err(err));
1525 if state.is_quit() {
1526 return state;
1527 }
1528 false
1529 }
1530 }
1531 } else {
1532 true
1533 };
1534
1535 // Try to read the directory first before we transfer ownership
1536 // to the provided closure. Do not unwrap it immediately, though,
1537 // as we may receive an `Err` value e.g. in the case when we do not
1538 // have sufficient read permissions to list the directory.
1539 // In that case we still want to provide the closure with a valid
1540 // entry before passing the error value.
1541 let readdir = work.read_dir();
1542 let depth = work.dent.depth();
1543 let state = self.visitor.visit(Ok(work.dent));
1544 if !state.is_continue() {
1545 return state;
1546 }
1547 if !descend {
1548 return WalkState::Skip;
1549 }
1550
1551 let readdir = match readdir {
1552 Ok(readdir) => readdir,
1553 Err(err) => {
1554 return self.visitor.visit(Err(err));
1555 }
1556 };
1557
1558 if self.max_depth.map_or(false, |max| depth >= max) {
1559 return WalkState::Skip;
1560 }
1561 for result in readdir {
1562 let state = self.generate_work(
1563 &work.ignore,
1564 depth + 1,
1565 work.root_device,
1566 result,
1567 );
1568 if state.is_quit() {
1569 return state;
1570 }
1571 }
1572 WalkState::Continue
1573 }
1574
1575 /// Decides whether to submit the given directory entry as a file to
1576 /// search.
1577 ///
1578 /// If the entry is a path that should be ignored, then this is a no-op.
1579 /// Otherwise, the entry is pushed on to the queue. (The actual execution
1580 /// of the callback happens in `run_one`.)
1581 ///
1582 /// If an error occurs while reading the entry, then it is sent to the
1583 /// caller's callback.
1584 ///
1585 /// `ig` is the `Ignore` matcher for the parent directory. `depth` should
1586 /// be the depth of this entry. `result` should be the item yielded by
1587 /// a directory iterator.
1588 fn generate_work(
1589 &mut self,
1590 ig: &Ignore,
1591 depth: usize,
1592 root_device: Option<u64>,
1593 result: Result<fs::DirEntry, io::Error>,
1594 ) -> WalkState {
1595 let fs_dent = match result {
1596 Ok(fs_dent) => fs_dent,
1597 Err(err) => {
1598 return self
1599 .visitor
1600 .visit(Err(Error::from(err).with_depth(depth)));
1601 }
1602 };
1603 let mut dent = match DirEntryRaw::from_entry(depth, &fs_dent) {
1604 Ok(dent) => DirEntry::new_raw(dent, None),
1605 Err(err) => {
1606 return self.visitor.visit(Err(err));
1607 }
1608 };
1609 let is_symlink = dent.file_type().map_or(false, |ft| ft.is_symlink());
1610 if self.follow_links && is_symlink {
1611 let path = dent.path().to_path_buf();
1612 dent = match DirEntryRaw::from_path(depth, path, true) {
1613 Ok(dent) => DirEntry::new_raw(dent, None),
1614 Err(err) => {
1615 return self.visitor.visit(Err(err));
1616 }
1617 };
1618 if dent.is_dir() {
1619 if let Err(err) = check_symlink_loop(ig, dent.path(), depth) {
1620 return self.visitor.visit(Err(err));
1621 }
1622 }
1623 }
1624 // N.B. See analogous call in the single-threaded implementation about
1625 // why it's important for this to come before the checks below.
1626 if should_skip_entry(ig, &dent) {
1627 return WalkState::Continue;
1628 }
1629 if let Some(ref stdout) = self.skip {
1630 let is_stdout = match path_equals(&dent, stdout) {
1631 Ok(is_stdout) => is_stdout,
1632 Err(err) => return self.visitor.visit(Err(err)),
1633 };
1634 if is_stdout {
1635 return WalkState::Continue;
1636 }
1637 }
1638 let should_skip_filesize =
1639 if self.max_filesize.is_some() && !dent.is_dir() {
1640 skip_filesize(
1641 self.max_filesize.unwrap(),
1642 dent.path(),
1643 &dent.metadata().ok(),
1644 )
1645 } else {
1646 false
1647 };
1648 let should_skip_filtered =
1649 if let Some(Filter(predicate)) = &self.filter {
1650 !predicate(&dent)
1651 } else {
1652 false
1653 };
1654 if !should_skip_filesize && !should_skip_filtered {
1655 self.send(Work { dent, ignore: ig.clone(), root_device });
1656 }
1657 WalkState::Continue
1658 }
1659
1660 /// Returns the next directory to descend into.
1661 ///
1662 /// If all work has been exhausted, then this returns None. The worker
1663 /// should then subsequently quit.
1664 fn get_work(&mut self) -> Option<Work> {
1665 let mut value = self.recv();
1666 loop {
1667 // Simulate a priority channel: If quit_now flag is set, we can
1668 // receive only quit messages.
1669 if self.is_quit_now() {
1670 value = Some(Message::Quit)
1671 }
1672 match value {
1673 Some(Message::Work(work)) => {
1674 return Some(work);
1675 }
1676 Some(Message::Quit) => {
1677 // Repeat quit message to wake up sleeping threads, if
1678 // any. The domino effect will ensure that every thread
1679 // will quit.
1680 self.send_quit();
1681 return None;
1682 }
1683 None => {
1684 if self.deactivate_worker() == 0 {
1685 // If deactivate_worker() returns 0, every worker thread
1686 // is currently within the critical section between the
1687 // acquire in deactivate_worker() and the release in
1688 // activate_worker() below. For this to happen, every
1689 // worker's local deque must be simultaneously empty,
1690 // meaning there is no more work left at all.
1691 self.send_quit();
1692 return None;
1693 }
1694 // Wait for next `Work` or `Quit` message.
1695 loop {
1696 if let Some(v) = self.recv() {
1697 self.activate_worker();
1698 value = Some(v);
1699 break;
1700 }
1701 // Our stack isn't blocking. Instead of burning the
1702 // CPU waiting, we let the thread sleep for a bit. In
1703 // general, this tends to only occur once the search is
1704 // approaching termination.
1705 let dur = std::time::Duration::from_millis(1);
1706 std::thread::sleep(dur);
1707 }
1708 }
1709 }
1710 }
1711 }
1712
1713 /// Indicates that all workers should quit immediately.
1714 fn quit_now(&self) {
1715 self.quit_now.store(true, AtomicOrdering::SeqCst);
1716 }
1717
1718 /// Returns true if this worker should quit immediately.
1719 fn is_quit_now(&self) -> bool {
1720 self.quit_now.load(AtomicOrdering::SeqCst)
1721 }
1722
1723 /// Send work.
1724 fn send(&self, work: Work) {
1725 self.stack.push(Message::Work(work));
1726 }
1727
1728 /// Send a quit message.
1729 fn send_quit(&self) {
1730 self.stack.push(Message::Quit);
1731 }
1732
1733 /// Receive work.
1734 fn recv(&self) -> Option<Message> {
1735 self.stack.pop()
1736 }
1737
1738 /// Deactivates a worker and returns the number of currently active workers.
1739 fn deactivate_worker(&self) -> usize {
1740 self.active_workers.fetch_sub(1, AtomicOrdering::Acquire) - 1
1741 }
1742
1743 /// Reactivates a worker.
1744 fn activate_worker(&self) {
1745 self.active_workers.fetch_add(1, AtomicOrdering::Release);
1746 }
1747}
1748
1749fn check_symlink_loop(
1750 ig_parent: &Ignore,
1751 child_path: &Path,
1752 child_depth: usize,
1753) -> Result<(), Error> {
1754 let hchild: Handle = Handle::from_path(child_path).map_err(|err: Error| {
1755 Error::from(err).with_path(child_path).with_depth(child_depth)
1756 })?;
1757 for ig: &Ignore in ig_parent.parents().take_while(|ig: &&Ignore| !ig.is_absolute_parent()) {
1758 let h: Handle = Handle::from_path(ig.path()).map_err(|err: Error| {
1759 Error::from(err).with_path(child_path).with_depth(child_depth)
1760 })?;
1761 if hchild == h {
1762 return Err(Error::Loop {
1763 ancestor: ig.path().to_path_buf(),
1764 child: child_path.to_path_buf(),
1765 }
1766 .with_depth(child_depth));
1767 }
1768 }
1769 Ok(())
1770}
1771
1772// Before calling this function, make sure that you ensure that is really
1773// necessary as the arguments imply a file stat.
1774fn skip_filesize(
1775 max_filesize: u64,
1776 path: &Path,
1777 ent: &Option<Metadata>,
1778) -> bool {
1779 let filesize: Option = match *ent {
1780 Some(ref md: &Metadata) => Some(md.len()),
1781 None => None,
1782 };
1783
1784 if let Some(fs: u64) = filesize {
1785 if fs > max_filesize {
1786 log::debug!("ignoring {}: {} bytes", path.display(), fs);
1787 true
1788 } else {
1789 false
1790 }
1791 } else {
1792 false
1793 }
1794}
1795
1796fn should_skip_entry(ig: &Ignore, dent: &DirEntry) -> bool {
1797 let m: Match> = ig.matched_dir_entry(dent);
1798 if m.is_ignore() {
1799 log::debug!("ignoring {}: {:?}", dent.path().display(), m);
1800 true
1801 } else if m.is_whitelist() {
1802 log::debug!("whitelisting {}: {:?}", dent.path().display(), m);
1803 false
1804 } else {
1805 false
1806 }
1807}
1808
1809/// Returns a handle to stdout for filtering search.
1810///
1811/// A handle is returned if and only if stdout is being redirected to a file.
1812/// The handle returned corresponds to that file.
1813///
1814/// This can be used to ensure that we do not attempt to search a file that we
1815/// may also be writing to.
1816fn stdout_handle() -> Option<Handle> {
1817 let h: Handle = match Handle::stdout() {
1818 Err(_) => return None,
1819 Ok(h: Handle) => h,
1820 };
1821 let md: Metadata = match h.as_file().metadata() {
1822 Err(_) => return None,
1823 Ok(md: Metadata) => md,
1824 };
1825 if !md.is_file() {
1826 return None;
1827 }
1828 Some(h)
1829}
1830
1831/// Returns true if and only if the given directory entry is believed to be
1832/// equivalent to the given handle. If there was a problem querying the path
1833/// for information to determine equality, then that error is returned.
1834fn path_equals(dent: &DirEntry, handle: &Handle) -> Result<bool, Error> {
1835 #[cfg(unix)]
1836 fn never_equal(dent: &DirEntry, handle: &Handle) -> bool {
1837 dent.ino() != Some(handle.ino())
1838 }
1839
1840 #[cfg(not(unix))]
1841 fn never_equal(_: &DirEntry, _: &Handle) -> bool {
1842 false
1843 }
1844
1845 // If we know for sure that these two things aren't equal, then avoid
1846 // the costly extra stat call to determine equality.
1847 if dent.is_stdin() || never_equal(dent, handle) {
1848 return Ok(false);
1849 }
1850 Handle::from_path(dent.path())
1851 .map(|h| &h == handle)
1852 .map_err(|err: Error| Error::Io(err).with_path(dent.path()))
1853}
1854
1855/// Returns true if the given walkdir entry corresponds to a directory.
1856///
1857/// This is normally just `dent.file_type().is_dir()`, but when we aren't
1858/// following symlinks, the root directory entry may be a symlink to a
1859/// directory that we *do* follow---by virtue of it being specified by the user
1860/// explicitly. In that case, we need to follow the symlink and query whether
1861/// it's a directory or not. But we only do this for root entries to avoid an
1862/// additional stat check in most cases.
1863fn walkdir_is_dir(dent: &walkdir::DirEntry) -> bool {
1864 if dent.file_type().is_dir() {
1865 return true;
1866 }
1867 if !dent.file_type().is_symlink() || dent.depth() > 0 {
1868 return false;
1869 }
1870 dent.path().metadata().ok().map_or(default:false, |md: Metadata| md.file_type().is_dir())
1871}
1872
1873/// Returns true if and only if the given path is on the same device as the
1874/// given root device.
1875fn is_same_file_system(root_device: u64, path: &Path) -> Result<bool, Error> {
1876 let dent_device: u64 =
1877 device_num(path).map_err(|err: Error| Error::Io(err).with_path(path))?;
1878 Ok(root_device == dent_device)
1879}
1880
1881#[cfg(unix)]
1882fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
1883 use std::os::unix::fs::MetadataExt;
1884
1885 path.as_ref().metadata().map(|md: Metadata| md.dev())
1886}
1887
1888#[cfg(windows)]
1889fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
1890 use winapi_util::{file, Handle};
1891
1892 let h = Handle::from_path_any(path)?;
1893 file::information(h).map(|info| info.volume_serial_number())
1894}
1895
1896#[cfg(not(any(unix, windows)))]
1897fn device_num<P: AsRef<Path>>(_: P) -> io::Result<u64> {
1898 Err(io::Error::new(
1899 io::ErrorKind::Other,
1900 "walkdir: same_file_system option not supported on this platform",
1901 ))
1902}
1903
1904#[cfg(test)]
1905mod tests {
1906 use std::ffi::OsStr;
1907 use std::fs::{self, File};
1908 use std::io::Write;
1909 use std::path::Path;
1910 use std::sync::{Arc, Mutex};
1911
1912 use super::{DirEntry, WalkBuilder, WalkState};
1913 use crate::tests::TempDir;
1914
1915 fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
1916 let mut file = File::create(path).unwrap();
1917 file.write_all(contents.as_bytes()).unwrap();
1918 }
1919
1920 fn wfile_size<P: AsRef<Path>>(path: P, size: u64) {
1921 let file = File::create(path).unwrap();
1922 file.set_len(size).unwrap();
1923 }
1924
1925 #[cfg(unix)]
1926 fn symlink<P: AsRef<Path>, Q: AsRef<Path>>(src: P, dst: Q) {
1927 use std::os::unix::fs::symlink;
1928 symlink(src, dst).unwrap();
1929 }
1930
1931 fn mkdirp<P: AsRef<Path>>(path: P) {
1932 fs::create_dir_all(path).unwrap();
1933 }
1934
1935 fn normal_path(unix: &str) -> String {
1936 if cfg!(windows) {
1937 unix.replace("\\", "/")
1938 } else {
1939 unix.to_string()
1940 }
1941 }
1942
1943 fn walk_collect(prefix: &Path, builder: &WalkBuilder) -> Vec<String> {
1944 let mut paths = vec![];
1945 for result in builder.build() {
1946 let dent = match result {
1947 Err(_) => continue,
1948 Ok(dent) => dent,
1949 };
1950 let path = dent.path().strip_prefix(prefix).unwrap();
1951 if path.as_os_str().is_empty() {
1952 continue;
1953 }
1954 paths.push(normal_path(path.to_str().unwrap()));
1955 }
1956 paths.sort();
1957 paths
1958 }
1959
1960 fn walk_collect_parallel(
1961 prefix: &Path,
1962 builder: &WalkBuilder,
1963 ) -> Vec<String> {
1964 let mut paths = vec![];
1965 for dent in walk_collect_entries_parallel(builder) {
1966 let path = dent.path().strip_prefix(prefix).unwrap();
1967 if path.as_os_str().is_empty() {
1968 continue;
1969 }
1970 paths.push(normal_path(path.to_str().unwrap()));
1971 }
1972 paths.sort();
1973 paths
1974 }
1975
1976 fn walk_collect_entries_parallel(builder: &WalkBuilder) -> Vec<DirEntry> {
1977 let dents = Arc::new(Mutex::new(vec![]));
1978 builder.build_parallel().run(|| {
1979 let dents = dents.clone();
1980 Box::new(move |result| {
1981 if let Ok(dent) = result {
1982 dents.lock().unwrap().push(dent);
1983 }
1984 WalkState::Continue
1985 })
1986 });
1987
1988 let dents = dents.lock().unwrap();
1989 dents.to_vec()
1990 }
1991
1992 fn mkpaths(paths: &[&str]) -> Vec<String> {
1993 let mut paths: Vec<_> = paths.iter().map(|s| s.to_string()).collect();
1994 paths.sort();
1995 paths
1996 }
1997
1998 fn tmpdir() -> TempDir {
1999 TempDir::new().unwrap()
2000 }
2001
2002 fn assert_paths(prefix: &Path, builder: &WalkBuilder, expected: &[&str]) {
2003 let got = walk_collect(prefix, builder);
2004 assert_eq!(got, mkpaths(expected), "single threaded");
2005 let got = walk_collect_parallel(prefix, builder);
2006 assert_eq!(got, mkpaths(expected), "parallel");
2007 }
2008
2009 #[test]
2010 fn no_ignores() {
2011 let td = tmpdir();
2012 mkdirp(td.path().join("a/b/c"));
2013 mkdirp(td.path().join("x/y"));
2014 wfile(td.path().join("a/b/foo"), "");
2015 wfile(td.path().join("x/y/foo"), "");
2016
2017 assert_paths(
2018 td.path(),
2019 &WalkBuilder::new(td.path()),
2020 &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"],
2021 );
2022 }
2023
2024 #[test]
2025 fn custom_ignore() {
2026 let td = tmpdir();
2027 let custom_ignore = ".customignore";
2028 mkdirp(td.path().join("a"));
2029 wfile(td.path().join(custom_ignore), "foo");
2030 wfile(td.path().join("foo"), "");
2031 wfile(td.path().join("a/foo"), "");
2032 wfile(td.path().join("bar"), "");
2033 wfile(td.path().join("a/bar"), "");
2034
2035 let mut builder = WalkBuilder::new(td.path());
2036 builder.add_custom_ignore_filename(&custom_ignore);
2037 assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
2038 }
2039
2040 #[test]
2041 fn custom_ignore_exclusive_use() {
2042 let td = tmpdir();
2043 let custom_ignore = ".customignore";
2044 mkdirp(td.path().join("a"));
2045 wfile(td.path().join(custom_ignore), "foo");
2046 wfile(td.path().join("foo"), "");
2047 wfile(td.path().join("a/foo"), "");
2048 wfile(td.path().join("bar"), "");
2049 wfile(td.path().join("a/bar"), "");
2050
2051 let mut builder = WalkBuilder::new(td.path());
2052 builder.ignore(false);
2053 builder.git_ignore(false);
2054 builder.git_global(false);
2055 builder.git_exclude(false);
2056 builder.add_custom_ignore_filename(&custom_ignore);
2057 assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
2058 }
2059
2060 #[test]
2061 fn gitignore() {
2062 let td = tmpdir();
2063 mkdirp(td.path().join(".git"));
2064 mkdirp(td.path().join("a"));
2065 wfile(td.path().join(".gitignore"), "foo");
2066 wfile(td.path().join("foo"), "");
2067 wfile(td.path().join("a/foo"), "");
2068 wfile(td.path().join("bar"), "");
2069 wfile(td.path().join("a/bar"), "");
2070
2071 assert_paths(
2072 td.path(),
2073 &WalkBuilder::new(td.path()),
2074 &["bar", "a", "a/bar"],
2075 );
2076 }
2077
2078 #[test]
2079 fn explicit_ignore() {
2080 let td = tmpdir();
2081 let igpath = td.path().join(".not-an-ignore");
2082 mkdirp(td.path().join("a"));
2083 wfile(&igpath, "foo");
2084 wfile(td.path().join("foo"), "");
2085 wfile(td.path().join("a/foo"), "");
2086 wfile(td.path().join("bar"), "");
2087 wfile(td.path().join("a/bar"), "");
2088
2089 let mut builder = WalkBuilder::new(td.path());
2090 assert!(builder.add_ignore(&igpath).is_none());
2091 assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
2092 }
2093
2094 #[test]
2095 fn explicit_ignore_exclusive_use() {
2096 let td = tmpdir();
2097 let igpath = td.path().join(".not-an-ignore");
2098 mkdirp(td.path().join("a"));
2099 wfile(&igpath, "foo");
2100 wfile(td.path().join("foo"), "");
2101 wfile(td.path().join("a/foo"), "");
2102 wfile(td.path().join("bar"), "");
2103 wfile(td.path().join("a/bar"), "");
2104
2105 let mut builder = WalkBuilder::new(td.path());
2106 builder.standard_filters(false);
2107 assert!(builder.add_ignore(&igpath).is_none());
2108 assert_paths(
2109 td.path(),
2110 &builder,
2111 &[".not-an-ignore", "bar", "a", "a/bar"],
2112 );
2113 }
2114
2115 #[test]
2116 fn gitignore_parent() {
2117 let td = tmpdir();
2118 mkdirp(td.path().join(".git"));
2119 mkdirp(td.path().join("a"));
2120 wfile(td.path().join(".gitignore"), "foo");
2121 wfile(td.path().join("a/foo"), "");
2122 wfile(td.path().join("a/bar"), "");
2123
2124 let root = td.path().join("a");
2125 assert_paths(&root, &WalkBuilder::new(&root), &["bar"]);
2126 }
2127
2128 #[test]
2129 fn max_depth() {
2130 let td = tmpdir();
2131 mkdirp(td.path().join("a/b/c"));
2132 wfile(td.path().join("foo"), "");
2133 wfile(td.path().join("a/foo"), "");
2134 wfile(td.path().join("a/b/foo"), "");
2135 wfile(td.path().join("a/b/c/foo"), "");
2136
2137 let mut builder = WalkBuilder::new(td.path());
2138 assert_paths(
2139 td.path(),
2140 &builder,
2141 &["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"],
2142 );
2143 assert_paths(td.path(), builder.max_depth(Some(0)), &[]);
2144 assert_paths(td.path(), builder.max_depth(Some(1)), &["a", "foo"]);
2145 assert_paths(
2146 td.path(),
2147 builder.max_depth(Some(2)),
2148 &["a", "a/b", "foo", "a/foo"],
2149 );
2150 }
2151
2152 #[test]
2153 fn max_filesize() {
2154 let td = tmpdir();
2155 mkdirp(td.path().join("a/b"));
2156 wfile_size(td.path().join("foo"), 0);
2157 wfile_size(td.path().join("bar"), 400);
2158 wfile_size(td.path().join("baz"), 600);
2159 wfile_size(td.path().join("a/foo"), 600);
2160 wfile_size(td.path().join("a/bar"), 500);
2161 wfile_size(td.path().join("a/baz"), 200);
2162
2163 let mut builder = WalkBuilder::new(td.path());
2164 assert_paths(
2165 td.path(),
2166 &builder,
2167 &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
2168 );
2169 assert_paths(
2170 td.path(),
2171 builder.max_filesize(Some(0)),
2172 &["a", "a/b", "foo"],
2173 );
2174 assert_paths(
2175 td.path(),
2176 builder.max_filesize(Some(500)),
2177 &["a", "a/b", "foo", "bar", "a/bar", "a/baz"],
2178 );
2179 assert_paths(
2180 td.path(),
2181 builder.max_filesize(Some(50000)),
2182 &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
2183 );
2184 }
2185
2186 #[cfg(unix)] // because symlinks on windows are weird
2187 #[test]
2188 fn symlinks() {
2189 let td = tmpdir();
2190 mkdirp(td.path().join("a/b"));
2191 symlink(td.path().join("a/b"), td.path().join("z"));
2192 wfile(td.path().join("a/b/foo"), "");
2193
2194 let mut builder = WalkBuilder::new(td.path());
2195 assert_paths(td.path(), &builder, &["a", "a/b", "a/b/foo", "z"]);
2196 assert_paths(
2197 td.path(),
2198 &builder.follow_links(true),
2199 &["a", "a/b", "a/b/foo", "z", "z/foo"],
2200 );
2201 }
2202
2203 #[cfg(unix)] // because symlinks on windows are weird
2204 #[test]
2205 fn first_path_not_symlink() {
2206 let td = tmpdir();
2207 mkdirp(td.path().join("foo"));
2208
2209 let dents = WalkBuilder::new(td.path().join("foo"))
2210 .build()
2211 .into_iter()
2212 .collect::<Result<Vec<_>, _>>()
2213 .unwrap();
2214 assert_eq!(1, dents.len());
2215 assert!(!dents[0].path_is_symlink());
2216
2217 let dents = walk_collect_entries_parallel(&WalkBuilder::new(
2218 td.path().join("foo"),
2219 ));
2220 assert_eq!(1, dents.len());
2221 assert!(!dents[0].path_is_symlink());
2222 }
2223
2224 #[cfg(unix)] // because symlinks on windows are weird
2225 #[test]
2226 fn symlink_loop() {
2227 let td = tmpdir();
2228 mkdirp(td.path().join("a/b"));
2229 symlink(td.path().join("a"), td.path().join("a/b/c"));
2230
2231 let mut builder = WalkBuilder::new(td.path());
2232 assert_paths(td.path(), &builder, &["a", "a/b", "a/b/c"]);
2233 assert_paths(td.path(), &builder.follow_links(true), &["a", "a/b"]);
2234 }
2235
2236 // It's a little tricky to test the 'same_file_system' option since
2237 // we need an environment with more than one file system. We adopt a
2238 // heuristic where /sys is typically a distinct volume on Linux and roll
2239 // with that.
2240 #[test]
2241 #[cfg(target_os = "linux")]
2242 fn same_file_system() {
2243 use super::device_num;
2244
2245 // If for some reason /sys doesn't exist or isn't a directory, just
2246 // skip this test.
2247 if !Path::new("/sys").is_dir() {
2248 return;
2249 }
2250
2251 // If our test directory actually isn't a different volume from /sys,
2252 // then this test is meaningless and we shouldn't run it.
2253 let td = tmpdir();
2254 if device_num(td.path()).unwrap() == device_num("/sys").unwrap() {
2255 return;
2256 }
2257
2258 mkdirp(td.path().join("same_file"));
2259 symlink("/sys", td.path().join("same_file").join("alink"));
2260
2261 // Create a symlink to sys and enable following symlinks. If the
2262 // same_file_system option doesn't work, then this probably will hit a
2263 // permission error. Otherwise, it should just skip over the symlink
2264 // completely.
2265 let mut builder = WalkBuilder::new(td.path());
2266 builder.follow_links(true).same_file_system(true);
2267 assert_paths(td.path(), &builder, &["same_file", "same_file/alink"]);
2268 }
2269
2270 #[cfg(target_os = "linux")]
2271 #[test]
2272 fn no_read_permissions() {
2273 let dir_path = Path::new("/root");
2274
2275 // There's no /etc/sudoers.d, skip the test.
2276 if !dir_path.is_dir() {
2277 return;
2278 }
2279 // We're the root, so the test won't check what we want it to.
2280 if fs::read_dir(&dir_path).is_ok() {
2281 return;
2282 }
2283
2284 // Check that we can't descend but get an entry for the parent dir.
2285 let builder = WalkBuilder::new(&dir_path);
2286 assert_paths(dir_path.parent().unwrap(), &builder, &["root"]);
2287 }
2288
2289 #[test]
2290 fn filter() {
2291 let td = tmpdir();
2292 mkdirp(td.path().join("a/b/c"));
2293 mkdirp(td.path().join("x/y"));
2294 wfile(td.path().join("a/b/foo"), "");
2295 wfile(td.path().join("x/y/foo"), "");
2296
2297 assert_paths(
2298 td.path(),
2299 &WalkBuilder::new(td.path()),
2300 &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"],
2301 );
2302
2303 assert_paths(
2304 td.path(),
2305 &WalkBuilder::new(td.path())
2306 .filter_entry(|entry| entry.file_name() != OsStr::new("a")),
2307 &["x", "x/y", "x/y/foo"],
2308 );
2309 }
2310}
2311