1use std::cmp;
2use std::ffi::OsStr;
3use std::fmt;
4use std::fs::{self, FileType, Metadata};
5use std::io;
6use std::path::{Path, PathBuf};
7use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
8use std::sync::{Arc, Mutex};
9use std::thread;
10use std::time::Duration;
11use std::vec;
12
13use same_file::Handle;
14use walkdir::{self, WalkDir};
15
16use crate::dir::{Ignore, IgnoreBuilder};
17use crate::gitignore::GitignoreBuilder;
18use crate::overrides::Override;
19use crate::types::Types;
20use crate::{Error, PartialErrorBuilder};
21
22/// A directory entry with a possible error attached.
23///
24/// The error typically refers to a problem parsing ignore files in a
25/// particular directory.
26#[derive(Clone, Debug)]
27pub struct DirEntry {
28 dent: DirEntryInner,
29 err: Option<Error>,
30}
31
32impl DirEntry {
33 /// The full path that this entry represents.
34 pub fn path(&self) -> &Path {
35 self.dent.path()
36 }
37
38 /// The full path that this entry represents.
39 /// Analogous to [`path`], but moves ownership of the path.
40 ///
41 /// [`path`]: struct.DirEntry.html#method.path
42 pub fn into_path(self) -> PathBuf {
43 self.dent.into_path()
44 }
45
46 /// Whether this entry corresponds to a symbolic link or not.
47 pub fn path_is_symlink(&self) -> bool {
48 self.dent.path_is_symlink()
49 }
50
51 /// Returns true if and only if this entry corresponds to stdin.
52 ///
53 /// i.e., The entry has depth 0 and its file name is `-`.
54 pub fn is_stdin(&self) -> bool {
55 self.dent.is_stdin()
56 }
57
58 /// Return the metadata for the file that this entry points to.
59 pub fn metadata(&self) -> Result<Metadata, Error> {
60 self.dent.metadata()
61 }
62
63 /// Return the file type for the file that this entry points to.
64 ///
65 /// This entry doesn't have a file type if it corresponds to stdin.
66 pub fn file_type(&self) -> Option<FileType> {
67 self.dent.file_type()
68 }
69
70 /// Return the file name of this entry.
71 ///
72 /// If this entry has no file name (e.g., `/`), then the full path is
73 /// returned.
74 pub fn file_name(&self) -> &OsStr {
75 self.dent.file_name()
76 }
77
78 /// Returns the depth at which this entry was created relative to the root.
79 pub fn depth(&self) -> usize {
80 self.dent.depth()
81 }
82
83 /// Returns the underlying inode number if one exists.
84 ///
85 /// If this entry doesn't have an inode number, then `None` is returned.
86 #[cfg(unix)]
87 pub fn ino(&self) -> Option<u64> {
88 self.dent.ino()
89 }
90
91 /// Returns an error, if one exists, associated with processing this entry.
92 ///
93 /// An example of an error is one that occurred while parsing an ignore
94 /// file. Errors related to traversing a directory tree itself are reported
95 /// as part of yielding the directory entry, and not with this method.
96 pub fn error(&self) -> Option<&Error> {
97 self.err.as_ref()
98 }
99
100 /// Returns true if and only if this entry points to a directory.
101 pub(crate) fn is_dir(&self) -> bool {
102 self.dent.is_dir()
103 }
104
105 fn new_stdin() -> DirEntry {
106 DirEntry { dent: DirEntryInner::Stdin, err: None }
107 }
108
109 fn new_walkdir(dent: walkdir::DirEntry, err: Option<Error>) -> DirEntry {
110 DirEntry { dent: DirEntryInner::Walkdir(dent), err: err }
111 }
112
113 fn new_raw(dent: DirEntryRaw, err: Option<Error>) -> DirEntry {
114 DirEntry { dent: DirEntryInner::Raw(dent), err: err }
115 }
116}
117
118/// DirEntryInner is the implementation of DirEntry.
119///
120/// It specifically represents three distinct sources of directory entries:
121///
122/// 1. From the walkdir crate.
123/// 2. Special entries that represent things like stdin.
124/// 3. From a path.
125///
126/// Specifically, (3) has to essentially re-create the DirEntry implementation
127/// from WalkDir.
128#[derive(Clone, Debug)]
129enum DirEntryInner {
130 Stdin,
131 Walkdir(walkdir::DirEntry),
132 Raw(DirEntryRaw),
133}
134
135impl DirEntryInner {
136 fn path(&self) -> &Path {
137 use self::DirEntryInner::*;
138 match *self {
139 Stdin => Path::new("<stdin>"),
140 Walkdir(ref x) => x.path(),
141 Raw(ref x) => x.path(),
142 }
143 }
144
145 fn into_path(self) -> PathBuf {
146 use self::DirEntryInner::*;
147 match self {
148 Stdin => PathBuf::from("<stdin>"),
149 Walkdir(x) => x.into_path(),
150 Raw(x) => x.into_path(),
151 }
152 }
153
154 fn path_is_symlink(&self) -> bool {
155 use self::DirEntryInner::*;
156 match *self {
157 Stdin => false,
158 Walkdir(ref x) => x.path_is_symlink(),
159 Raw(ref x) => x.path_is_symlink(),
160 }
161 }
162
163 fn is_stdin(&self) -> bool {
164 match *self {
165 DirEntryInner::Stdin => true,
166 _ => false,
167 }
168 }
169
170 fn metadata(&self) -> Result<Metadata, Error> {
171 use self::DirEntryInner::*;
172 match *self {
173 Stdin => {
174 let err = Error::Io(io::Error::new(
175 io::ErrorKind::Other,
176 "<stdin> has no metadata",
177 ));
178 Err(err.with_path("<stdin>"))
179 }
180 Walkdir(ref x) => x.metadata().map_err(|err| {
181 Error::Io(io::Error::from(err)).with_path(x.path())
182 }),
183 Raw(ref x) => x.metadata(),
184 }
185 }
186
187 fn file_type(&self) -> Option<FileType> {
188 use self::DirEntryInner::*;
189 match *self {
190 Stdin => None,
191 Walkdir(ref x) => Some(x.file_type()),
192 Raw(ref x) => Some(x.file_type()),
193 }
194 }
195
196 fn file_name(&self) -> &OsStr {
197 use self::DirEntryInner::*;
198 match *self {
199 Stdin => OsStr::new("<stdin>"),
200 Walkdir(ref x) => x.file_name(),
201 Raw(ref x) => x.file_name(),
202 }
203 }
204
205 fn depth(&self) -> usize {
206 use self::DirEntryInner::*;
207 match *self {
208 Stdin => 0,
209 Walkdir(ref x) => x.depth(),
210 Raw(ref x) => x.depth(),
211 }
212 }
213
214 #[cfg(unix)]
215 fn ino(&self) -> Option<u64> {
216 use self::DirEntryInner::*;
217 use walkdir::DirEntryExt;
218 match *self {
219 Stdin => None,
220 Walkdir(ref x) => Some(x.ino()),
221 Raw(ref x) => Some(x.ino()),
222 }
223 }
224
225 /// Returns true if and only if this entry points to a directory.
226 fn is_dir(&self) -> bool {
227 self.file_type().map(|ft| ft.is_dir()).unwrap_or(false)
228 }
229}
230
231/// DirEntryRaw is essentially copied from the walkdir crate so that we can
232/// build `DirEntry`s from whole cloth in the parallel iterator.
233#[derive(Clone)]
234struct DirEntryRaw {
235 /// The path as reported by the `fs::ReadDir` iterator (even if it's a
236 /// symbolic link).
237 path: PathBuf,
238 /// The file type. Necessary for recursive iteration, so store it.
239 ty: FileType,
240 /// Is set when this entry was created from a symbolic link and the user
241 /// expects the iterator to follow symbolic links.
242 follow_link: bool,
243 /// The depth at which this entry was generated relative to the root.
244 depth: usize,
245 /// The underlying inode number (Unix only).
246 #[cfg(unix)]
247 ino: u64,
248 /// The underlying metadata (Windows only). We store this on Windows
249 /// because this comes for free while reading a directory.
250 #[cfg(windows)]
251 metadata: fs::Metadata,
252}
253
254impl fmt::Debug for DirEntryRaw {
255 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
256 // Leaving out FileType because it doesn't have a debug impl
257 // in Rust 1.9. We could add it if we really wanted to by manually
258 // querying each possibly file type. Meh. ---AG
259 f&mut DebugStruct<'_, '_>.debug_struct("DirEntryRaw")
260 .field("path", &self.path)
261 .field("follow_link", &self.follow_link)
262 .field(name:"depth", &self.depth)
263 .finish()
264 }
265}
266
267impl DirEntryRaw {
268 fn path(&self) -> &Path {
269 &self.path
270 }
271
272 fn into_path(self) -> PathBuf {
273 self.path
274 }
275
276 fn path_is_symlink(&self) -> bool {
277 self.ty.is_symlink() || self.follow_link
278 }
279
280 fn metadata(&self) -> Result<Metadata, Error> {
281 self.metadata_internal()
282 }
283
284 #[cfg(windows)]
285 fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
286 if self.follow_link {
287 fs::metadata(&self.path)
288 } else {
289 Ok(self.metadata.clone())
290 }
291 .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
292 }
293
294 #[cfg(not(windows))]
295 fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
296 if self.follow_link {
297 fs::metadata(&self.path)
298 } else {
299 fs::symlink_metadata(&self.path)
300 }
301 .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
302 }
303
304 fn file_type(&self) -> FileType {
305 self.ty
306 }
307
308 fn file_name(&self) -> &OsStr {
309 self.path.file_name().unwrap_or_else(|| self.path.as_os_str())
310 }
311
312 fn depth(&self) -> usize {
313 self.depth
314 }
315
316 #[cfg(unix)]
317 fn ino(&self) -> u64 {
318 self.ino
319 }
320
321 fn from_entry(
322 depth: usize,
323 ent: &fs::DirEntry,
324 ) -> Result<DirEntryRaw, Error> {
325 let ty = ent.file_type().map_err(|err| {
326 let err = Error::Io(io::Error::from(err)).with_path(ent.path());
327 Error::WithDepth { depth: depth, err: Box::new(err) }
328 })?;
329 DirEntryRaw::from_entry_os(depth, ent, ty)
330 }
331
332 #[cfg(windows)]
333 fn from_entry_os(
334 depth: usize,
335 ent: &fs::DirEntry,
336 ty: fs::FileType,
337 ) -> Result<DirEntryRaw, Error> {
338 let md = ent.metadata().map_err(|err| {
339 let err = Error::Io(io::Error::from(err)).with_path(ent.path());
340 Error::WithDepth { depth: depth, err: Box::new(err) }
341 })?;
342 Ok(DirEntryRaw {
343 path: ent.path(),
344 ty: ty,
345 follow_link: false,
346 depth: depth,
347 metadata: md,
348 })
349 }
350
351 #[cfg(unix)]
352 fn from_entry_os(
353 depth: usize,
354 ent: &fs::DirEntry,
355 ty: fs::FileType,
356 ) -> Result<DirEntryRaw, Error> {
357 use std::os::unix::fs::DirEntryExt;
358
359 Ok(DirEntryRaw {
360 path: ent.path(),
361 ty: ty,
362 follow_link: false,
363 depth: depth,
364 ino: ent.ino(),
365 })
366 }
367
368 // Placeholder implementation to allow compiling on non-standard platforms
369 // (e.g. wasm32).
370 #[cfg(not(any(windows, unix)))]
371 fn from_entry_os(
372 depth: usize,
373 ent: &fs::DirEntry,
374 ty: fs::FileType,
375 ) -> Result<DirEntryRaw, Error> {
376 Err(Error::Io(io::Error::new(
377 io::ErrorKind::Other,
378 "unsupported platform",
379 )))
380 }
381
382 #[cfg(windows)]
383 fn from_path(
384 depth: usize,
385 pb: PathBuf,
386 link: bool,
387 ) -> Result<DirEntryRaw, Error> {
388 let md =
389 fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
390 Ok(DirEntryRaw {
391 path: pb,
392 ty: md.file_type(),
393 follow_link: link,
394 depth: depth,
395 metadata: md,
396 })
397 }
398
399 #[cfg(unix)]
400 fn from_path(
401 depth: usize,
402 pb: PathBuf,
403 link: bool,
404 ) -> Result<DirEntryRaw, Error> {
405 use std::os::unix::fs::MetadataExt;
406
407 let md =
408 fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
409 Ok(DirEntryRaw {
410 path: pb,
411 ty: md.file_type(),
412 follow_link: link,
413 depth: depth,
414 ino: md.ino(),
415 })
416 }
417
418 // Placeholder implementation to allow compiling on non-standard platforms
419 // (e.g. wasm32).
420 #[cfg(not(any(windows, unix)))]
421 fn from_path(
422 depth: usize,
423 pb: PathBuf,
424 link: bool,
425 ) -> Result<DirEntryRaw, Error> {
426 Err(Error::Io(io::Error::new(
427 io::ErrorKind::Other,
428 "unsupported platform",
429 )))
430 }
431}
432
433/// WalkBuilder builds a recursive directory iterator.
434///
435/// The builder supports a large number of configurable options. This includes
436/// specific glob overrides, file type matching, toggling whether hidden
437/// files are ignored or not, and of course, support for respecting gitignore
438/// files.
439///
440/// By default, all ignore files found are respected. This includes `.ignore`,
441/// `.gitignore`, `.git/info/exclude` and even your global gitignore
442/// globs, usually found in `$XDG_CONFIG_HOME/git/ignore`.
443///
444/// Some standard recursive directory options are also supported, such as
445/// limiting the recursive depth or whether to follow symbolic links (disabled
446/// by default).
447///
448/// # Ignore rules
449///
450/// There are many rules that influence whether a particular file or directory
451/// is skipped by this iterator. Those rules are documented here. Note that
452/// the rules assume a default configuration.
453///
454/// * First, glob overrides are checked. If a path matches a glob override,
455/// then matching stops. The path is then only skipped if the glob that matched
456/// the path is an ignore glob. (An override glob is a whitelist glob unless it
457/// starts with a `!`, in which case it is an ignore glob.)
458/// * Second, ignore files are checked. Ignore files currently only come from
459/// git ignore files (`.gitignore`, `.git/info/exclude` and the configured
460/// global gitignore file), plain `.ignore` files, which have the same format
461/// as gitignore files, or explicitly added ignore files. The precedence order
462/// is: `.ignore`, `.gitignore`, `.git/info/exclude`, global gitignore and
463/// finally explicitly added ignore files. Note that precedence between
464/// different types of ignore files is not impacted by the directory hierarchy;
465/// any `.ignore` file overrides all `.gitignore` files. Within each precedence
466/// level, more nested ignore files have a higher precedence than less nested
467/// ignore files.
468/// * Third, if the previous step yields an ignore match, then all matching
469/// is stopped and the path is skipped. If it yields a whitelist match, then
470/// matching continues. A whitelist match can be overridden by a later matcher.
471/// * Fourth, unless the path is a directory, the file type matcher is run on
472/// the path. As above, if it yields an ignore match, then all matching is
473/// stopped and the path is skipped. If it yields a whitelist match, then
474/// matching continues.
475/// * Fifth, if the path hasn't been whitelisted and it is hidden, then the
476/// path is skipped.
477/// * Sixth, unless the path is a directory, the size of the file is compared
478/// against the max filesize limit. If it exceeds the limit, it is skipped.
479/// * Seventh, if the path has made it this far then it is yielded in the
480/// iterator.
481#[derive(Clone)]
482pub struct WalkBuilder {
483 paths: Vec<PathBuf>,
484 ig_builder: IgnoreBuilder,
485 max_depth: Option<usize>,
486 max_filesize: Option<u64>,
487 follow_links: bool,
488 same_file_system: bool,
489 sorter: Option<Sorter>,
490 threads: usize,
491 skip: Option<Arc<Handle>>,
492 filter: Option<Filter>,
493}
494
495#[derive(Clone)]
496enum Sorter {
497 ByName(
498 Arc<dyn Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static>,
499 ),
500 ByPath(Arc<dyn Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static>),
501}
502
503#[derive(Clone)]
504struct Filter(Arc<dyn Fn(&DirEntry) -> bool + Send + Sync + 'static>);
505
506impl fmt::Debug for WalkBuilder {
507 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
508 f&mut DebugStruct<'_, '_>.debug_struct("WalkBuilder")
509 .field("paths", &self.paths)
510 .field("ig_builder", &self.ig_builder)
511 .field("max_depth", &self.max_depth)
512 .field("max_filesize", &self.max_filesize)
513 .field("follow_links", &self.follow_links)
514 .field("threads", &self.threads)
515 .field(name:"skip", &self.skip)
516 .finish()
517 }
518}
519
520impl WalkBuilder {
521 /// Create a new builder for a recursive directory iterator for the
522 /// directory given.
523 ///
524 /// Note that if you want to traverse multiple different directories, it
525 /// is better to call `add` on this builder than to create multiple
526 /// `Walk` values.
527 pub fn new<P: AsRef<Path>>(path: P) -> WalkBuilder {
528 WalkBuilder {
529 paths: vec![path.as_ref().to_path_buf()],
530 ig_builder: IgnoreBuilder::new(),
531 max_depth: None,
532 max_filesize: None,
533 follow_links: false,
534 same_file_system: false,
535 sorter: None,
536 threads: 0,
537 skip: None,
538 filter: None,
539 }
540 }
541
542 /// Build a new `Walk` iterator.
543 pub fn build(&self) -> Walk {
544 let follow_links = self.follow_links;
545 let max_depth = self.max_depth;
546 let sorter = self.sorter.clone();
547 let its = self
548 .paths
549 .iter()
550 .map(move |p| {
551 if p == Path::new("-") {
552 (p.to_path_buf(), None)
553 } else {
554 let mut wd = WalkDir::new(p);
555 wd = wd.follow_links(follow_links || p.is_file());
556 wd = wd.same_file_system(self.same_file_system);
557 if let Some(max_depth) = max_depth {
558 wd = wd.max_depth(max_depth);
559 }
560 if let Some(ref sorter) = sorter {
561 match sorter.clone() {
562 Sorter::ByName(cmp) => {
563 wd = wd.sort_by(move |a, b| {
564 cmp(a.file_name(), b.file_name())
565 });
566 }
567 Sorter::ByPath(cmp) => {
568 wd = wd.sort_by(move |a, b| {
569 cmp(a.path(), b.path())
570 });
571 }
572 }
573 }
574 (p.to_path_buf(), Some(WalkEventIter::from(wd)))
575 }
576 })
577 .collect::<Vec<_>>()
578 .into_iter();
579 let ig_root = self.ig_builder.build();
580 Walk {
581 its: its,
582 it: None,
583 ig_root: ig_root.clone(),
584 ig: ig_root.clone(),
585 max_filesize: self.max_filesize,
586 skip: self.skip.clone(),
587 filter: self.filter.clone(),
588 }
589 }
590
591 /// Build a new `WalkParallel` iterator.
592 ///
593 /// Note that this *doesn't* return something that implements `Iterator`.
594 /// Instead, the returned value must be run with a closure. e.g.,
595 /// `builder.build_parallel().run(|| |path| println!("{:?}", path))`.
596 pub fn build_parallel(&self) -> WalkParallel {
597 WalkParallel {
598 paths: self.paths.clone().into_iter(),
599 ig_root: self.ig_builder.build(),
600 max_depth: self.max_depth,
601 max_filesize: self.max_filesize,
602 follow_links: self.follow_links,
603 same_file_system: self.same_file_system,
604 threads: self.threads,
605 skip: self.skip.clone(),
606 filter: self.filter.clone(),
607 }
608 }
609
610 /// Add a file path to the iterator.
611 ///
612 /// Each additional file path added is traversed recursively. This should
613 /// be preferred over building multiple `Walk` iterators since this
614 /// enables reusing resources across iteration.
615 pub fn add<P: AsRef<Path>>(&mut self, path: P) -> &mut WalkBuilder {
616 self.paths.push(path.as_ref().to_path_buf());
617 self
618 }
619
620 /// The maximum depth to recurse.
621 ///
622 /// The default, `None`, imposes no depth restriction.
623 pub fn max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder {
624 self.max_depth = depth;
625 self
626 }
627
628 /// Whether to follow symbolic links or not.
629 pub fn follow_links(&mut self, yes: bool) -> &mut WalkBuilder {
630 self.follow_links = yes;
631 self
632 }
633
634 /// Whether to ignore files above the specified limit.
635 pub fn max_filesize(&mut self, filesize: Option<u64>) -> &mut WalkBuilder {
636 self.max_filesize = filesize;
637 self
638 }
639
640 /// The number of threads to use for traversal.
641 ///
642 /// Note that this only has an effect when using `build_parallel`.
643 ///
644 /// The default setting is `0`, which chooses the number of threads
645 /// automatically using heuristics.
646 pub fn threads(&mut self, n: usize) -> &mut WalkBuilder {
647 self.threads = n;
648 self
649 }
650
651 /// Add a global ignore file to the matcher.
652 ///
653 /// This has lower precedence than all other sources of ignore rules.
654 ///
655 /// If there was a problem adding the ignore file, then an error is
656 /// returned. Note that the error may indicate *partial* failure. For
657 /// example, if an ignore file contains an invalid glob, all other globs
658 /// are still applied.
659 pub fn add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> {
660 let mut builder = GitignoreBuilder::new("");
661 let mut errs = PartialErrorBuilder::default();
662 errs.maybe_push(builder.add(path));
663 match builder.build() {
664 Ok(gi) => {
665 self.ig_builder.add_ignore(gi);
666 }
667 Err(err) => {
668 errs.push(err);
669 }
670 }
671 errs.into_error_option()
672 }
673
674 /// Add a custom ignore file name
675 ///
676 /// These ignore files have higher precedence than all other ignore files.
677 ///
678 /// When specifying multiple names, earlier names have lower precedence than
679 /// later names.
680 pub fn add_custom_ignore_filename<S: AsRef<OsStr>>(
681 &mut self,
682 file_name: S,
683 ) -> &mut WalkBuilder {
684 self.ig_builder.add_custom_ignore_filename(file_name);
685 self
686 }
687
688 /// Add an override matcher.
689 ///
690 /// By default, no override matcher is used.
691 ///
692 /// This overrides any previous setting.
693 pub fn overrides(&mut self, overrides: Override) -> &mut WalkBuilder {
694 self.ig_builder.overrides(overrides);
695 self
696 }
697
698 /// Add a file type matcher.
699 ///
700 /// By default, no file type matcher is used.
701 ///
702 /// This overrides any previous setting.
703 pub fn types(&mut self, types: Types) -> &mut WalkBuilder {
704 self.ig_builder.types(types);
705 self
706 }
707
708 /// Enables all the standard ignore filters.
709 ///
710 /// This toggles, as a group, all the filters that are enabled by default:
711 ///
712 /// - [hidden()](#method.hidden)
713 /// - [parents()](#method.parents)
714 /// - [ignore()](#method.ignore)
715 /// - [git_ignore()](#method.git_ignore)
716 /// - [git_global()](#method.git_global)
717 /// - [git_exclude()](#method.git_exclude)
718 ///
719 /// They may still be toggled individually after calling this function.
720 ///
721 /// This is (by definition) enabled by default.
722 pub fn standard_filters(&mut self, yes: bool) -> &mut WalkBuilder {
723 self.hidden(yes)
724 .parents(yes)
725 .ignore(yes)
726 .git_ignore(yes)
727 .git_global(yes)
728 .git_exclude(yes)
729 }
730
731 /// Enables ignoring hidden files.
732 ///
733 /// This is enabled by default.
734 pub fn hidden(&mut self, yes: bool) -> &mut WalkBuilder {
735 self.ig_builder.hidden(yes);
736 self
737 }
738
739 /// Enables reading ignore files from parent directories.
740 ///
741 /// If this is enabled, then .gitignore files in parent directories of each
742 /// file path given are respected. Otherwise, they are ignored.
743 ///
744 /// This is enabled by default.
745 pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder {
746 self.ig_builder.parents(yes);
747 self
748 }
749
750 /// Enables reading `.ignore` files.
751 ///
752 /// `.ignore` files have the same semantics as `gitignore` files and are
753 /// supported by search tools such as ripgrep and The Silver Searcher.
754 ///
755 /// This is enabled by default.
756 pub fn ignore(&mut self, yes: bool) -> &mut WalkBuilder {
757 self.ig_builder.ignore(yes);
758 self
759 }
760
761 /// Enables reading a global gitignore file, whose path is specified in
762 /// git's `core.excludesFile` config option.
763 ///
764 /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig`
765 /// does not exist or does not specify `core.excludesFile`, then
766 /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
767 /// set or is empty, then `$HOME/.config/git/ignore` is used instead.
768 ///
769 /// This is enabled by default.
770 pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder {
771 self.ig_builder.git_global(yes);
772 self
773 }
774
775 /// Enables reading `.gitignore` files.
776 ///
777 /// `.gitignore` files have match semantics as described in the `gitignore`
778 /// man page.
779 ///
780 /// This is enabled by default.
781 pub fn git_ignore(&mut self, yes: bool) -> &mut WalkBuilder {
782 self.ig_builder.git_ignore(yes);
783 self
784 }
785
786 /// Enables reading `.git/info/exclude` files.
787 ///
788 /// `.git/info/exclude` files have match semantics as described in the
789 /// `gitignore` man page.
790 ///
791 /// This is enabled by default.
792 pub fn git_exclude(&mut self, yes: bool) -> &mut WalkBuilder {
793 self.ig_builder.git_exclude(yes);
794 self
795 }
796
797 /// Whether a git repository is required to apply git-related ignore
798 /// rules (global rules, .gitignore and local exclude rules).
799 ///
800 /// When disabled, git-related ignore rules are applied even when searching
801 /// outside a git repository.
802 pub fn require_git(&mut self, yes: bool) -> &mut WalkBuilder {
803 self.ig_builder.require_git(yes);
804 self
805 }
806
807 /// Process ignore files case insensitively
808 ///
809 /// This is disabled by default.
810 pub fn ignore_case_insensitive(&mut self, yes: bool) -> &mut WalkBuilder {
811 self.ig_builder.ignore_case_insensitive(yes);
812 self
813 }
814
815 /// Set a function for sorting directory entries by their path.
816 ///
817 /// If a compare function is set, the resulting iterator will return all
818 /// paths in sorted order. The compare function will be called to compare
819 /// entries from the same directory.
820 ///
821 /// This is like `sort_by_file_name`, except the comparator accepts
822 /// a `&Path` instead of the base file name, which permits it to sort by
823 /// more criteria.
824 ///
825 /// This method will override any previous sorter set by this method or
826 /// by `sort_by_file_name`.
827 ///
828 /// Note that this is not used in the parallel iterator.
829 pub fn sort_by_file_path<F>(&mut self, cmp: F) -> &mut WalkBuilder
830 where
831 F: Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static,
832 {
833 self.sorter = Some(Sorter::ByPath(Arc::new(cmp)));
834 self
835 }
836
837 /// Set a function for sorting directory entries by file name.
838 ///
839 /// If a compare function is set, the resulting iterator will return all
840 /// paths in sorted order. The compare function will be called to compare
841 /// names from entries from the same directory using only the name of the
842 /// entry.
843 ///
844 /// This method will override any previous sorter set by this method or
845 /// by `sort_by_file_path`.
846 ///
847 /// Note that this is not used in the parallel iterator.
848 pub fn sort_by_file_name<F>(&mut self, cmp: F) -> &mut WalkBuilder
849 where
850 F: Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static,
851 {
852 self.sorter = Some(Sorter::ByName(Arc::new(cmp)));
853 self
854 }
855
856 /// Do not cross file system boundaries.
857 ///
858 /// When this option is enabled, directory traversal will not descend into
859 /// directories that are on a different file system from the root path.
860 ///
861 /// Currently, this option is only supported on Unix and Windows. If this
862 /// option is used on an unsupported platform, then directory traversal
863 /// will immediately return an error and will not yield any entries.
864 pub fn same_file_system(&mut self, yes: bool) -> &mut WalkBuilder {
865 self.same_file_system = yes;
866 self
867 }
868
869 /// Do not yield directory entries that are believed to correspond to
870 /// stdout.
871 ///
872 /// This is useful when a command is invoked via shell redirection to a
873 /// file that is also being read. For example, `grep -r foo ./ > results`
874 /// might end up trying to search `results` even though it is also writing
875 /// to it, which could cause an unbounded feedback loop. Setting this
876 /// option prevents this from happening by skipping over the `results`
877 /// file.
878 ///
879 /// This is disabled by default.
880 pub fn skip_stdout(&mut self, yes: bool) -> &mut WalkBuilder {
881 if yes {
882 self.skip = stdout_handle().map(Arc::new);
883 } else {
884 self.skip = None;
885 }
886 self
887 }
888
889 /// Yields only entries which satisfy the given predicate and skips
890 /// descending into directories that do not satisfy the given predicate.
891 ///
892 /// The predicate is applied to all entries. If the predicate is
893 /// true, iteration carries on as normal. If the predicate is false, the
894 /// entry is ignored and if it is a directory, it is not descended into.
895 ///
896 /// Note that the errors for reading entries that may not satisfy the
897 /// predicate will still be yielded.
898 pub fn filter_entry<P>(&mut self, filter: P) -> &mut WalkBuilder
899 where
900 P: Fn(&DirEntry) -> bool + Send + Sync + 'static,
901 {
902 self.filter = Some(Filter(Arc::new(filter)));
903 self
904 }
905}
906
907/// Walk is a recursive directory iterator over file paths in one or more
908/// directories.
909///
910/// Only file and directory paths matching the rules are returned. By default,
911/// ignore files like `.gitignore` are respected. The precise matching rules
912/// and precedence is explained in the documentation for `WalkBuilder`.
913pub struct Walk {
914 its: vec::IntoIter<(PathBuf, Option<WalkEventIter>)>,
915 it: Option<WalkEventIter>,
916 ig_root: Ignore,
917 ig: Ignore,
918 max_filesize: Option<u64>,
919 skip: Option<Arc<Handle>>,
920 filter: Option<Filter>,
921}
922
923impl Walk {
924 /// Creates a new recursive directory iterator for the file path given.
925 ///
926 /// Note that this uses default settings, which include respecting
927 /// `.gitignore` files. To configure the iterator, use `WalkBuilder`
928 /// instead.
929 pub fn new<P: AsRef<Path>>(path: P) -> Walk {
930 WalkBuilder::new(path).build()
931 }
932
933 fn skip_entry(&self, ent: &DirEntry) -> Result<bool, Error> {
934 if ent.depth() == 0 {
935 return Ok(false);
936 }
937 // We ensure that trivial skipping is done before any other potentially
938 // expensive operations (stat, filesystem other) are done. This seems
939 // like an obvious optimization but becomes critical when filesystem
940 // operations even as simple as stat can result in significant
941 // overheads; an example of this was a bespoke filesystem layer in
942 // Windows that hosted files remotely and would download them on-demand
943 // when particular filesystem operations occurred. Users of this system
944 // who ensured correct file-type filters were being used could still
945 // get unnecessary file access resulting in large downloads.
946 if should_skip_entry(&self.ig, ent) {
947 return Ok(true);
948 }
949 if let Some(ref stdout) = self.skip {
950 if path_equals(ent, stdout)? {
951 return Ok(true);
952 }
953 }
954 if self.max_filesize.is_some() && !ent.is_dir() {
955 return Ok(skip_filesize(
956 self.max_filesize.unwrap(),
957 ent.path(),
958 &ent.metadata().ok(),
959 ));
960 }
961 if let Some(Filter(filter)) = &self.filter {
962 if !filter(ent) {
963 return Ok(true);
964 }
965 }
966 Ok(false)
967 }
968}
969
970impl Iterator for Walk {
971 type Item = Result<DirEntry, Error>;
972
973 #[inline(always)]
974 fn next(&mut self) -> Option<Result<DirEntry, Error>> {
975 loop {
976 let ev = match self.it.as_mut().and_then(|it| it.next()) {
977 Some(ev) => ev,
978 None => {
979 match self.its.next() {
980 None => return None,
981 Some((_, None)) => {
982 return Some(Ok(DirEntry::new_stdin()));
983 }
984 Some((path, Some(it))) => {
985 self.it = Some(it);
986 if path.is_dir() {
987 let (ig, err) = self.ig_root.add_parents(path);
988 self.ig = ig;
989 if let Some(err) = err {
990 return Some(Err(err));
991 }
992 } else {
993 self.ig = self.ig_root.clone();
994 }
995 }
996 }
997 continue;
998 }
999 };
1000 match ev {
1001 Err(err) => {
1002 return Some(Err(Error::from_walkdir(err)));
1003 }
1004 Ok(WalkEvent::Exit) => {
1005 self.ig = self.ig.parent().unwrap();
1006 }
1007 Ok(WalkEvent::Dir(ent)) => {
1008 let mut ent = DirEntry::new_walkdir(ent, None);
1009 let should_skip = match self.skip_entry(&ent) {
1010 Err(err) => return Some(Err(err)),
1011 Ok(should_skip) => should_skip,
1012 };
1013 if should_skip {
1014 self.it.as_mut().unwrap().it.skip_current_dir();
1015 // Still need to push this on the stack because
1016 // we'll get a WalkEvent::Exit event for this dir.
1017 // We don't care if it errors though.
1018 let (igtmp, _) = self.ig.add_child(ent.path());
1019 self.ig = igtmp;
1020 continue;
1021 }
1022 let (igtmp, err) = self.ig.add_child(ent.path());
1023 self.ig = igtmp;
1024 ent.err = err;
1025 return Some(Ok(ent));
1026 }
1027 Ok(WalkEvent::File(ent)) => {
1028 let ent = DirEntry::new_walkdir(ent, None);
1029 let should_skip = match self.skip_entry(&ent) {
1030 Err(err) => return Some(Err(err)),
1031 Ok(should_skip) => should_skip,
1032 };
1033 if should_skip {
1034 continue;
1035 }
1036 return Some(Ok(ent));
1037 }
1038 }
1039 }
1040 }
1041}
1042
1043/// WalkEventIter transforms a WalkDir iterator into an iterator that more
1044/// accurately describes the directory tree. Namely, it emits events that are
1045/// one of three types: directory, file or "exit." An "exit" event means that
1046/// the entire contents of a directory have been enumerated.
1047struct WalkEventIter {
1048 depth: usize,
1049 it: walkdir::IntoIter,
1050 next: Option<Result<walkdir::DirEntry, walkdir::Error>>,
1051}
1052
1053#[derive(Debug)]
1054enum WalkEvent {
1055 Dir(walkdir::DirEntry),
1056 File(walkdir::DirEntry),
1057 Exit,
1058}
1059
1060impl From<WalkDir> for WalkEventIter {
1061 fn from(it: WalkDir) -> WalkEventIter {
1062 WalkEventIter { depth: 0, it: it.into_iter(), next: None }
1063 }
1064}
1065
1066impl Iterator for WalkEventIter {
1067 type Item = walkdir::Result<WalkEvent>;
1068
1069 #[inline(always)]
1070 fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
1071 let dent = self.next.take().or_else(|| self.it.next());
1072 let depth = match dent {
1073 None => 0,
1074 Some(Ok(ref dent)) => dent.depth(),
1075 Some(Err(ref err)) => err.depth(),
1076 };
1077 if depth < self.depth {
1078 self.depth -= 1;
1079 self.next = dent;
1080 return Some(Ok(WalkEvent::Exit));
1081 }
1082 self.depth = depth;
1083 match dent {
1084 None => None,
1085 Some(Err(err)) => Some(Err(err)),
1086 Some(Ok(dent)) => {
1087 if walkdir_is_dir(&dent) {
1088 self.depth += 1;
1089 Some(Ok(WalkEvent::Dir(dent)))
1090 } else {
1091 Some(Ok(WalkEvent::File(dent)))
1092 }
1093 }
1094 }
1095 }
1096}
1097
1098/// WalkState is used in the parallel recursive directory iterator to indicate
1099/// whether walking should continue as normal, skip descending into a
1100/// particular directory or quit the walk entirely.
1101#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1102pub enum WalkState {
1103 /// Continue walking as normal.
1104 Continue,
1105 /// If the directory entry given is a directory, don't descend into it.
1106 /// In all other cases, this has no effect.
1107 Skip,
1108 /// Quit the entire iterator as soon as possible.
1109 ///
1110 /// Note that this is an inherently asynchronous action. It is possible
1111 /// for more entries to be yielded even after instructing the iterator
1112 /// to quit.
1113 Quit,
1114}
1115
1116impl WalkState {
1117 fn is_continue(&self) -> bool {
1118 *self == WalkState::Continue
1119 }
1120
1121 fn is_quit(&self) -> bool {
1122 *self == WalkState::Quit
1123 }
1124}
1125
1126/// A builder for constructing a visitor when using
1127/// [`WalkParallel::visit`](struct.WalkParallel.html#method.visit). The builder
1128/// will be called for each thread started by `WalkParallel`. The visitor
1129/// returned from each builder is then called for every directory entry.
1130pub trait ParallelVisitorBuilder<'s> {
1131 /// Create per-thread `ParallelVisitor`s for `WalkParallel`.
1132 fn build(&mut self) -> Box<dyn ParallelVisitor + 's>;
1133}
1134
1135impl<'a, 's, P: ParallelVisitorBuilder<'s>> ParallelVisitorBuilder<'s>
1136 for &'a mut P
1137{
1138 fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
1139 (**self).build()
1140 }
1141}
1142
1143/// Receives files and directories for the current thread.
1144///
1145/// Setup for the traversal can be implemented as part of
1146/// [`ParallelVisitorBuilder::build`](trait.ParallelVisitorBuilder.html#tymethod.build).
1147/// Teardown when traversal finishes can be implemented by implementing the
1148/// `Drop` trait on your traversal type.
1149pub trait ParallelVisitor: Send {
1150 /// Receives files and directories for the current thread. This is called
1151 /// once for every directory entry visited by traversal.
1152 fn visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState;
1153}
1154
1155struct FnBuilder<F> {
1156 builder: F,
1157}
1158
1159impl<'s, F: FnMut() -> FnVisitor<'s>> ParallelVisitorBuilder<'s>
1160 for FnBuilder<F>
1161{
1162 fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
1163 let visitor: Box) -> … + Send> = (self.builder)();
1164 Box::new(FnVisitorImp { visitor })
1165 }
1166}
1167
1168type FnVisitor<'s> =
1169 Box<dyn FnMut(Result<DirEntry, Error>) -> WalkState + Send + 's>;
1170
1171struct FnVisitorImp<'s> {
1172 visitor: FnVisitor<'s>,
1173}
1174
1175impl<'s> ParallelVisitor for FnVisitorImp<'s> {
1176 fn visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState {
1177 (self.visitor)(entry)
1178 }
1179}
1180
1181/// WalkParallel is a parallel recursive directory iterator over files paths
1182/// in one or more directories.
1183///
1184/// Only file and directory paths matching the rules are returned. By default,
1185/// ignore files like `.gitignore` are respected. The precise matching rules
1186/// and precedence is explained in the documentation for `WalkBuilder`.
1187///
1188/// Unlike `Walk`, this uses multiple threads for traversing a directory.
1189pub struct WalkParallel {
1190 paths: vec::IntoIter<PathBuf>,
1191 ig_root: Ignore,
1192 max_filesize: Option<u64>,
1193 max_depth: Option<usize>,
1194 follow_links: bool,
1195 same_file_system: bool,
1196 threads: usize,
1197 skip: Option<Arc<Handle>>,
1198 filter: Option<Filter>,
1199}
1200
1201impl WalkParallel {
1202 /// Execute the parallel recursive directory iterator. `mkf` is called
1203 /// for each thread used for iteration. The function produced by `mkf`
1204 /// is then in turn called for each visited file path.
1205 pub fn run<'s, F>(self, mkf: F)
1206 where
1207 F: FnMut() -> FnVisitor<'s>,
1208 {
1209 self.visit(&mut FnBuilder { builder: mkf })
1210 }
1211
1212 /// Execute the parallel recursive directory iterator using a custom
1213 /// visitor.
1214 ///
1215 /// The builder given is used to construct a visitor for every thread
1216 /// used by this traversal. The visitor returned from each builder is then
1217 /// called for every directory entry seen by that thread.
1218 ///
1219 /// Typically, creating a custom visitor is useful if you need to perform
1220 /// some kind of cleanup once traversal is finished. This can be achieved
1221 /// by implementing `Drop` for your builder (or for your visitor, if you
1222 /// want to execute cleanup for every thread that is launched).
1223 ///
1224 /// For example, each visitor might build up a data structure of results
1225 /// corresponding to the directory entries seen for each thread. Since each
1226 /// visitor runs on only one thread, this build-up can be done without
1227 /// synchronization. Then, once traversal is complete, all of the results
1228 /// can be merged together into a single data structure.
1229 pub fn visit(mut self, builder: &mut dyn ParallelVisitorBuilder<'_>) {
1230 let threads = self.threads();
1231 let stack = Arc::new(Mutex::new(vec![]));
1232 {
1233 let mut stack = stack.lock().unwrap();
1234 let mut visitor = builder.build();
1235 let mut paths = Vec::new().into_iter();
1236 std::mem::swap(&mut paths, &mut self.paths);
1237 // Send the initial set of root paths to the pool of workers. Note
1238 // that we only send directories. For files, we send to them the
1239 // callback directly.
1240 for path in paths {
1241 let (dent, root_device) = if path == Path::new("-") {
1242 (DirEntry::new_stdin(), None)
1243 } else {
1244 let root_device = if !self.same_file_system {
1245 None
1246 } else {
1247 match device_num(&path) {
1248 Ok(root_device) => Some(root_device),
1249 Err(err) => {
1250 let err = Error::Io(err).with_path(path);
1251 if visitor.visit(Err(err)).is_quit() {
1252 return;
1253 }
1254 continue;
1255 }
1256 }
1257 };
1258 match DirEntryRaw::from_path(0, path, false) {
1259 Ok(dent) => {
1260 (DirEntry::new_raw(dent, None), root_device)
1261 }
1262 Err(err) => {
1263 if visitor.visit(Err(err)).is_quit() {
1264 return;
1265 }
1266 continue;
1267 }
1268 }
1269 };
1270 stack.push(Message::Work(Work {
1271 dent: dent,
1272 ignore: self.ig_root.clone(),
1273 root_device: root_device,
1274 }));
1275 }
1276 // ... but there's no need to start workers if we don't need them.
1277 if stack.is_empty() {
1278 return;
1279 }
1280 }
1281 // Create the workers and then wait for them to finish.
1282 let quit_now = Arc::new(AtomicBool::new(false));
1283 let num_pending =
1284 Arc::new(AtomicUsize::new(stack.lock().unwrap().len()));
1285 std::thread::scope(|s| {
1286 let mut handles = vec![];
1287 for _ in 0..threads {
1288 let worker = Worker {
1289 visitor: builder.build(),
1290 stack: stack.clone(),
1291 quit_now: quit_now.clone(),
1292 num_pending: num_pending.clone(),
1293 max_depth: self.max_depth,
1294 max_filesize: self.max_filesize,
1295 follow_links: self.follow_links,
1296 skip: self.skip.clone(),
1297 filter: self.filter.clone(),
1298 };
1299 handles.push(s.spawn(|| worker.run()));
1300 }
1301 for handle in handles {
1302 handle.join().unwrap();
1303 }
1304 });
1305 }
1306
1307 fn threads(&self) -> usize {
1308 if self.threads == 0 {
1309 2
1310 } else {
1311 self.threads
1312 }
1313 }
1314}
1315
1316/// Message is the set of instructions that a worker knows how to process.
1317enum Message {
1318 /// A work item corresponds to a directory that should be descended into.
1319 /// Work items for entries that should be skipped or ignored should not
1320 /// be produced.
1321 Work(Work),
1322 /// This instruction indicates that the worker should quit.
1323 Quit,
1324}
1325
1326/// A unit of work for each worker to process.
1327///
1328/// Each unit of work corresponds to a directory that should be descended
1329/// into.
1330struct Work {
1331 /// The directory entry.
1332 dent: DirEntry,
1333 /// Any ignore matchers that have been built for this directory's parents.
1334 ignore: Ignore,
1335 /// The root device number. When present, only files with the same device
1336 /// number should be considered.
1337 root_device: Option<u64>,
1338}
1339
1340impl Work {
1341 /// Returns true if and only if this work item is a directory.
1342 fn is_dir(&self) -> bool {
1343 self.dent.is_dir()
1344 }
1345
1346 /// Returns true if and only if this work item is a symlink.
1347 fn is_symlink(&self) -> bool {
1348 self.dent.file_type().map_or(false, |ft| ft.is_symlink())
1349 }
1350
1351 /// Adds ignore rules for parent directories.
1352 ///
1353 /// Note that this only applies to entries at depth 0. On all other
1354 /// entries, this is a no-op.
1355 fn add_parents(&mut self) -> Option<Error> {
1356 if self.dent.depth() > 0 {
1357 return None;
1358 }
1359 // At depth 0, the path of this entry is a root path, so we can
1360 // use it directly to add parent ignore rules.
1361 let (ig, err) = self.ignore.add_parents(self.dent.path());
1362 self.ignore = ig;
1363 err
1364 }
1365
1366 /// Reads the directory contents of this work item and adds ignore
1367 /// rules for this directory.
1368 ///
1369 /// If there was a problem with reading the directory contents, then
1370 /// an error is returned. If there was a problem reading the ignore
1371 /// rules for this directory, then the error is attached to this
1372 /// work item's directory entry.
1373 fn read_dir(&mut self) -> Result<fs::ReadDir, Error> {
1374 let readdir = match fs::read_dir(self.dent.path()) {
1375 Ok(readdir) => readdir,
1376 Err(err) => {
1377 let err = Error::from(err)
1378 .with_path(self.dent.path())
1379 .with_depth(self.dent.depth());
1380 return Err(err);
1381 }
1382 };
1383 let (ig, err) = self.ignore.add_child(self.dent.path());
1384 self.ignore = ig;
1385 self.dent.err = err;
1386 Ok(readdir)
1387 }
1388}
1389
1390/// A worker is responsible for descending into directories, updating the
1391/// ignore matchers, producing new work and invoking the caller's callback.
1392///
1393/// Note that a worker is *both* a producer and a consumer.
1394struct Worker<'s> {
1395 /// The caller's callback.
1396 visitor: Box<dyn ParallelVisitor + 's>,
1397 /// A stack of work to do.
1398 ///
1399 /// We use a stack instead of a channel because a stack lets us visit
1400 /// directories in depth first order. This can substantially reduce peak
1401 /// memory usage by keeping both the number of files path and gitignore
1402 /// matchers in memory lower.
1403 stack: Arc<Mutex<Vec<Message>>>,
1404 /// Whether all workers should terminate at the next opportunity. Note
1405 /// that we need this because we don't want other `Work` to be done after
1406 /// we quit. We wouldn't need this if have a priority channel.
1407 quit_now: Arc<AtomicBool>,
1408 /// The number of outstanding work items.
1409 num_pending: Arc<AtomicUsize>,
1410 /// The maximum depth of directories to descend. A value of `0` means no
1411 /// descension at all.
1412 max_depth: Option<usize>,
1413 /// The maximum size a searched file can be (in bytes). If a file exceeds
1414 /// this size it will be skipped.
1415 max_filesize: Option<u64>,
1416 /// Whether to follow symbolic links or not. When this is enabled, loop
1417 /// detection is performed.
1418 follow_links: bool,
1419 /// A file handle to skip, currently is either `None` or stdout, if it's
1420 /// a file and it has been requested to skip files identical to stdout.
1421 skip: Option<Arc<Handle>>,
1422 /// A predicate applied to dir entries. If true, the entry and all
1423 /// children will be skipped.
1424 filter: Option<Filter>,
1425}
1426
1427impl<'s> Worker<'s> {
1428 /// Runs this worker until there is no more work left to do.
1429 ///
1430 /// The worker will call the caller's callback for all entries that aren't
1431 /// skipped by the ignore matcher.
1432 fn run(mut self) {
1433 while let Some(work) = self.get_work() {
1434 if let WalkState::Quit = self.run_one(work) {
1435 self.quit_now();
1436 }
1437 self.work_done();
1438 }
1439 }
1440
1441 fn run_one(&mut self, mut work: Work) -> WalkState {
1442 // If the work is not a directory, then we can just execute the
1443 // caller's callback immediately and move on.
1444 if work.is_symlink() || !work.is_dir() {
1445 return self.visitor.visit(Ok(work.dent));
1446 }
1447 if let Some(err) = work.add_parents() {
1448 let state = self.visitor.visit(Err(err));
1449 if state.is_quit() {
1450 return state;
1451 }
1452 }
1453
1454 let descend = if let Some(root_device) = work.root_device {
1455 match is_same_file_system(root_device, work.dent.path()) {
1456 Ok(true) => true,
1457 Ok(false) => false,
1458 Err(err) => {
1459 let state = self.visitor.visit(Err(err));
1460 if state.is_quit() {
1461 return state;
1462 }
1463 false
1464 }
1465 }
1466 } else {
1467 true
1468 };
1469
1470 // Try to read the directory first before we transfer ownership
1471 // to the provided closure. Do not unwrap it immediately, though,
1472 // as we may receive an `Err` value e.g. in the case when we do not
1473 // have sufficient read permissions to list the directory.
1474 // In that case we still want to provide the closure with a valid
1475 // entry before passing the error value.
1476 let readdir = work.read_dir();
1477 let depth = work.dent.depth();
1478 let state = self.visitor.visit(Ok(work.dent));
1479 if !state.is_continue() {
1480 return state;
1481 }
1482 if !descend {
1483 return WalkState::Skip;
1484 }
1485
1486 let readdir = match readdir {
1487 Ok(readdir) => readdir,
1488 Err(err) => {
1489 return self.visitor.visit(Err(err));
1490 }
1491 };
1492
1493 if self.max_depth.map_or(false, |max| depth >= max) {
1494 return WalkState::Skip;
1495 }
1496 for result in readdir {
1497 let state = self.generate_work(
1498 &work.ignore,
1499 depth + 1,
1500 work.root_device,
1501 result,
1502 );
1503 if state.is_quit() {
1504 return state;
1505 }
1506 }
1507 WalkState::Continue
1508 }
1509
1510 /// Decides whether to submit the given directory entry as a file to
1511 /// search.
1512 ///
1513 /// If the entry is a path that should be ignored, then this is a no-op.
1514 /// Otherwise, the entry is pushed on to the queue. (The actual execution
1515 /// of the callback happens in `run_one`.)
1516 ///
1517 /// If an error occurs while reading the entry, then it is sent to the
1518 /// caller's callback.
1519 ///
1520 /// `ig` is the `Ignore` matcher for the parent directory. `depth` should
1521 /// be the depth of this entry. `result` should be the item yielded by
1522 /// a directory iterator.
1523 fn generate_work(
1524 &mut self,
1525 ig: &Ignore,
1526 depth: usize,
1527 root_device: Option<u64>,
1528 result: Result<fs::DirEntry, io::Error>,
1529 ) -> WalkState {
1530 let fs_dent = match result {
1531 Ok(fs_dent) => fs_dent,
1532 Err(err) => {
1533 return self
1534 .visitor
1535 .visit(Err(Error::from(err).with_depth(depth)));
1536 }
1537 };
1538 let mut dent = match DirEntryRaw::from_entry(depth, &fs_dent) {
1539 Ok(dent) => DirEntry::new_raw(dent, None),
1540 Err(err) => {
1541 return self.visitor.visit(Err(err));
1542 }
1543 };
1544 let is_symlink = dent.file_type().map_or(false, |ft| ft.is_symlink());
1545 if self.follow_links && is_symlink {
1546 let path = dent.path().to_path_buf();
1547 dent = match DirEntryRaw::from_path(depth, path, true) {
1548 Ok(dent) => DirEntry::new_raw(dent, None),
1549 Err(err) => {
1550 return self.visitor.visit(Err(err));
1551 }
1552 };
1553 if dent.is_dir() {
1554 if let Err(err) = check_symlink_loop(ig, dent.path(), depth) {
1555 return self.visitor.visit(Err(err));
1556 }
1557 }
1558 }
1559 // N.B. See analogous call in the single-threaded implementation about
1560 // why it's important for this to come before the checks below.
1561 if should_skip_entry(ig, &dent) {
1562 return WalkState::Continue;
1563 }
1564 if let Some(ref stdout) = self.skip {
1565 let is_stdout = match path_equals(&dent, stdout) {
1566 Ok(is_stdout) => is_stdout,
1567 Err(err) => return self.visitor.visit(Err(err)),
1568 };
1569 if is_stdout {
1570 return WalkState::Continue;
1571 }
1572 }
1573 let should_skip_filesize =
1574 if self.max_filesize.is_some() && !dent.is_dir() {
1575 skip_filesize(
1576 self.max_filesize.unwrap(),
1577 dent.path(),
1578 &dent.metadata().ok(),
1579 )
1580 } else {
1581 false
1582 };
1583 let should_skip_filtered =
1584 if let Some(Filter(predicate)) = &self.filter {
1585 !predicate(&dent)
1586 } else {
1587 false
1588 };
1589 if !should_skip_filesize && !should_skip_filtered {
1590 self.send(Work { dent, ignore: ig.clone(), root_device });
1591 }
1592 WalkState::Continue
1593 }
1594
1595 /// Returns the next directory to descend into.
1596 ///
1597 /// If all work has been exhausted, then this returns None. The worker
1598 /// should then subsequently quit.
1599 fn get_work(&mut self) -> Option<Work> {
1600 let mut value = self.recv();
1601 loop {
1602 // Simulate a priority channel: If quit_now flag is set, we can
1603 // receive only quit messages.
1604 if self.is_quit_now() {
1605 value = Some(Message::Quit)
1606 }
1607 match value {
1608 Some(Message::Work(work)) => {
1609 return Some(work);
1610 }
1611 Some(Message::Quit) => {
1612 // Repeat quit message to wake up sleeping threads, if
1613 // any. The domino effect will ensure that every thread
1614 // will quit.
1615 self.send_quit();
1616 return None;
1617 }
1618 None => {
1619 // Once num_pending reaches 0, it is impossible for it to
1620 // ever increase again. Namely, it only reaches 0 once
1621 // all jobs have run such that no jobs have produced more
1622 // work. We have this guarantee because num_pending is
1623 // always incremented before each job is submitted and only
1624 // decremented once each job is completely finished.
1625 // Therefore, if this reaches zero, then there can be no
1626 // other job running.
1627 if self.num_pending() == 0 {
1628 // Every other thread is blocked at the next recv().
1629 // Send the initial quit message and quit.
1630 self.send_quit();
1631 return None;
1632 }
1633 // Wait for next `Work` or `Quit` message.
1634 loop {
1635 if let Some(v) = self.recv() {
1636 value = Some(v);
1637 break;
1638 }
1639 // Our stack isn't blocking. Instead of burning the
1640 // CPU waiting, we let the thread sleep for a bit. In
1641 // general, this tends to only occur once the search is
1642 // approaching termination.
1643 thread::sleep(Duration::from_millis(1));
1644 }
1645 }
1646 }
1647 }
1648 }
1649
1650 /// Indicates that all workers should quit immediately.
1651 fn quit_now(&self) {
1652 self.quit_now.store(true, Ordering::SeqCst);
1653 }
1654
1655 /// Returns true if this worker should quit immediately.
1656 fn is_quit_now(&self) -> bool {
1657 self.quit_now.load(Ordering::SeqCst)
1658 }
1659
1660 /// Returns the number of pending jobs.
1661 fn num_pending(&self) -> usize {
1662 self.num_pending.load(Ordering::SeqCst)
1663 }
1664
1665 /// Send work.
1666 fn send(&self, work: Work) {
1667 self.num_pending.fetch_add(1, Ordering::SeqCst);
1668 let mut stack = self.stack.lock().unwrap();
1669 stack.push(Message::Work(work));
1670 }
1671
1672 /// Send a quit message.
1673 fn send_quit(&self) {
1674 let mut stack = self.stack.lock().unwrap();
1675 stack.push(Message::Quit);
1676 }
1677
1678 /// Receive work.
1679 fn recv(&self) -> Option<Message> {
1680 let mut stack = self.stack.lock().unwrap();
1681 stack.pop()
1682 }
1683
1684 /// Signal that work has been received.
1685 fn work_done(&self) {
1686 self.num_pending.fetch_sub(1, Ordering::SeqCst);
1687 }
1688}
1689
1690fn check_symlink_loop(
1691 ig_parent: &Ignore,
1692 child_path: &Path,
1693 child_depth: usize,
1694) -> Result<(), Error> {
1695 let hchild: Handle = Handle::from_path(child_path).map_err(|err: Error| {
1696 Error::from(err).with_path(child_path).with_depth(child_depth)
1697 })?;
1698 for ig: &Ignore in ig_parent.parents().take_while(|ig: &&Ignore| !ig.is_absolute_parent()) {
1699 let h: Handle = Handle::from_path(ig.path()).map_err(|err: Error| {
1700 Error::from(err).with_path(child_path).with_depth(child_depth)
1701 })?;
1702 if hchild == h {
1703 return Err(Error::Loop {
1704 ancestor: ig.path().to_path_buf(),
1705 child: child_path.to_path_buf(),
1706 }
1707 .with_depth(child_depth));
1708 }
1709 }
1710 Ok(())
1711}
1712
1713// Before calling this function, make sure that you ensure that is really
1714// necessary as the arguments imply a file stat.
1715fn skip_filesize(
1716 max_filesize: u64,
1717 path: &Path,
1718 ent: &Option<Metadata>,
1719) -> bool {
1720 let filesize: Option = match *ent {
1721 Some(ref md: &Metadata) => Some(md.len()),
1722 None => None,
1723 };
1724
1725 if let Some(fs: u64) = filesize {
1726 if fs > max_filesize {
1727 log::debug!("ignoring {}: {} bytes", path.display(), fs);
1728 true
1729 } else {
1730 false
1731 }
1732 } else {
1733 false
1734 }
1735}
1736
1737fn should_skip_entry(ig: &Ignore, dent: &DirEntry) -> bool {
1738 let m: Match> = ig.matched_dir_entry(dent);
1739 if m.is_ignore() {
1740 log::debug!("ignoring {}: {:?}", dent.path().display(), m);
1741 true
1742 } else if m.is_whitelist() {
1743 log::debug!("whitelisting {}: {:?}", dent.path().display(), m);
1744 false
1745 } else {
1746 false
1747 }
1748}
1749
1750/// Returns a handle to stdout for filtering search.
1751///
1752/// A handle is returned if and only if stdout is being redirected to a file.
1753/// The handle returned corresponds to that file.
1754///
1755/// This can be used to ensure that we do not attempt to search a file that we
1756/// may also be writing to.
1757fn stdout_handle() -> Option<Handle> {
1758 let h: Handle = match Handle::stdout() {
1759 Err(_) => return None,
1760 Ok(h: Handle) => h,
1761 };
1762 let md: Metadata = match h.as_file().metadata() {
1763 Err(_) => return None,
1764 Ok(md: Metadata) => md,
1765 };
1766 if !md.is_file() {
1767 return None;
1768 }
1769 Some(h)
1770}
1771
1772/// Returns true if and only if the given directory entry is believed to be
1773/// equivalent to the given handle. If there was a problem querying the path
1774/// for information to determine equality, then that error is returned.
1775fn path_equals(dent: &DirEntry, handle: &Handle) -> Result<bool, Error> {
1776 #[cfg(unix)]
1777 fn never_equal(dent: &DirEntry, handle: &Handle) -> bool {
1778 dent.ino() != Some(handle.ino())
1779 }
1780
1781 #[cfg(not(unix))]
1782 fn never_equal(_: &DirEntry, _: &Handle) -> bool {
1783 false
1784 }
1785
1786 // If we know for sure that these two things aren't equal, then avoid
1787 // the costly extra stat call to determine equality.
1788 if dent.is_stdin() || never_equal(dent, handle) {
1789 return Ok(false);
1790 }
1791 Handle::from_path(dent.path())
1792 .map(|h| &h == handle)
1793 .map_err(|err: Error| Error::Io(err).with_path(dent.path()))
1794}
1795
1796/// Returns true if the given walkdir entry corresponds to a directory.
1797///
1798/// This is normally just `dent.file_type().is_dir()`, but when we aren't
1799/// following symlinks, the root directory entry may be a symlink to a
1800/// directory that we *do* follow---by virtue of it being specified by the user
1801/// explicitly. In that case, we need to follow the symlink and query whether
1802/// it's a directory or not. But we only do this for root entries to avoid an
1803/// additional stat check in most cases.
1804fn walkdir_is_dir(dent: &walkdir::DirEntry) -> bool {
1805 if dent.file_type().is_dir() {
1806 return true;
1807 }
1808 if !dent.file_type().is_symlink() || dent.depth() > 0 {
1809 return false;
1810 }
1811 dent.path().metadata().ok().map_or(default:false, |md: Metadata| md.file_type().is_dir())
1812}
1813
1814/// Returns true if and only if the given path is on the same device as the
1815/// given root device.
1816fn is_same_file_system(root_device: u64, path: &Path) -> Result<bool, Error> {
1817 let dent_device: u64 =
1818 device_num(path).map_err(|err: Error| Error::Io(err).with_path(path))?;
1819 Ok(root_device == dent_device)
1820}
1821
1822#[cfg(unix)]
1823fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
1824 use std::os::unix::fs::MetadataExt;
1825
1826 path.as_ref().metadata().map(|md: Metadata| md.dev())
1827}
1828
1829#[cfg(windows)]
1830fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
1831 use winapi_util::{file, Handle};
1832
1833 let h = Handle::from_path_any(path)?;
1834 file::information(h).map(|info| info.volume_serial_number())
1835}
1836
1837#[cfg(not(any(unix, windows)))]
1838fn device_num<P: AsRef<Path>>(_: P) -> io::Result<u64> {
1839 Err(io::Error::new(
1840 io::ErrorKind::Other,
1841 "walkdir: same_file_system option not supported on this platform",
1842 ))
1843}
1844
1845#[cfg(test)]
1846mod tests {
1847 use std::ffi::OsStr;
1848 use std::fs::{self, File};
1849 use std::io::Write;
1850 use std::path::Path;
1851 use std::sync::{Arc, Mutex};
1852
1853 use super::{DirEntry, WalkBuilder, WalkState};
1854 use crate::tests::TempDir;
1855
1856 fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
1857 let mut file = File::create(path).unwrap();
1858 file.write_all(contents.as_bytes()).unwrap();
1859 }
1860
1861 fn wfile_size<P: AsRef<Path>>(path: P, size: u64) {
1862 let file = File::create(path).unwrap();
1863 file.set_len(size).unwrap();
1864 }
1865
1866 #[cfg(unix)]
1867 fn symlink<P: AsRef<Path>, Q: AsRef<Path>>(src: P, dst: Q) {
1868 use std::os::unix::fs::symlink;
1869 symlink(src, dst).unwrap();
1870 }
1871
1872 fn mkdirp<P: AsRef<Path>>(path: P) {
1873 fs::create_dir_all(path).unwrap();
1874 }
1875
1876 fn normal_path(unix: &str) -> String {
1877 if cfg!(windows) {
1878 unix.replace("\\", "/")
1879 } else {
1880 unix.to_string()
1881 }
1882 }
1883
1884 fn walk_collect(prefix: &Path, builder: &WalkBuilder) -> Vec<String> {
1885 let mut paths = vec![];
1886 for result in builder.build() {
1887 let dent = match result {
1888 Err(_) => continue,
1889 Ok(dent) => dent,
1890 };
1891 let path = dent.path().strip_prefix(prefix).unwrap();
1892 if path.as_os_str().is_empty() {
1893 continue;
1894 }
1895 paths.push(normal_path(path.to_str().unwrap()));
1896 }
1897 paths.sort();
1898 paths
1899 }
1900
1901 fn walk_collect_parallel(
1902 prefix: &Path,
1903 builder: &WalkBuilder,
1904 ) -> Vec<String> {
1905 let mut paths = vec![];
1906 for dent in walk_collect_entries_parallel(builder) {
1907 let path = dent.path().strip_prefix(prefix).unwrap();
1908 if path.as_os_str().is_empty() {
1909 continue;
1910 }
1911 paths.push(normal_path(path.to_str().unwrap()));
1912 }
1913 paths.sort();
1914 paths
1915 }
1916
1917 fn walk_collect_entries_parallel(builder: &WalkBuilder) -> Vec<DirEntry> {
1918 let dents = Arc::new(Mutex::new(vec![]));
1919 builder.build_parallel().run(|| {
1920 let dents = dents.clone();
1921 Box::new(move |result| {
1922 if let Ok(dent) = result {
1923 dents.lock().unwrap().push(dent);
1924 }
1925 WalkState::Continue
1926 })
1927 });
1928
1929 let dents = dents.lock().unwrap();
1930 dents.to_vec()
1931 }
1932
1933 fn mkpaths(paths: &[&str]) -> Vec<String> {
1934 let mut paths: Vec<_> = paths.iter().map(|s| s.to_string()).collect();
1935 paths.sort();
1936 paths
1937 }
1938
1939 fn tmpdir() -> TempDir {
1940 TempDir::new().unwrap()
1941 }
1942
1943 fn assert_paths(prefix: &Path, builder: &WalkBuilder, expected: &[&str]) {
1944 let got = walk_collect(prefix, builder);
1945 assert_eq!(got, mkpaths(expected), "single threaded");
1946 let got = walk_collect_parallel(prefix, builder);
1947 assert_eq!(got, mkpaths(expected), "parallel");
1948 }
1949
1950 #[test]
1951 fn no_ignores() {
1952 let td = tmpdir();
1953 mkdirp(td.path().join("a/b/c"));
1954 mkdirp(td.path().join("x/y"));
1955 wfile(td.path().join("a/b/foo"), "");
1956 wfile(td.path().join("x/y/foo"), "");
1957
1958 assert_paths(
1959 td.path(),
1960 &WalkBuilder::new(td.path()),
1961 &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"],
1962 );
1963 }
1964
1965 #[test]
1966 fn custom_ignore() {
1967 let td = tmpdir();
1968 let custom_ignore = ".customignore";
1969 mkdirp(td.path().join("a"));
1970 wfile(td.path().join(custom_ignore), "foo");
1971 wfile(td.path().join("foo"), "");
1972 wfile(td.path().join("a/foo"), "");
1973 wfile(td.path().join("bar"), "");
1974 wfile(td.path().join("a/bar"), "");
1975
1976 let mut builder = WalkBuilder::new(td.path());
1977 builder.add_custom_ignore_filename(&custom_ignore);
1978 assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
1979 }
1980
1981 #[test]
1982 fn custom_ignore_exclusive_use() {
1983 let td = tmpdir();
1984 let custom_ignore = ".customignore";
1985 mkdirp(td.path().join("a"));
1986 wfile(td.path().join(custom_ignore), "foo");
1987 wfile(td.path().join("foo"), "");
1988 wfile(td.path().join("a/foo"), "");
1989 wfile(td.path().join("bar"), "");
1990 wfile(td.path().join("a/bar"), "");
1991
1992 let mut builder = WalkBuilder::new(td.path());
1993 builder.ignore(false);
1994 builder.git_ignore(false);
1995 builder.git_global(false);
1996 builder.git_exclude(false);
1997 builder.add_custom_ignore_filename(&custom_ignore);
1998 assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
1999 }
2000
2001 #[test]
2002 fn gitignore() {
2003 let td = tmpdir();
2004 mkdirp(td.path().join(".git"));
2005 mkdirp(td.path().join("a"));
2006 wfile(td.path().join(".gitignore"), "foo");
2007 wfile(td.path().join("foo"), "");
2008 wfile(td.path().join("a/foo"), "");
2009 wfile(td.path().join("bar"), "");
2010 wfile(td.path().join("a/bar"), "");
2011
2012 assert_paths(
2013 td.path(),
2014 &WalkBuilder::new(td.path()),
2015 &["bar", "a", "a/bar"],
2016 );
2017 }
2018
2019 #[test]
2020 fn explicit_ignore() {
2021 let td = tmpdir();
2022 let igpath = td.path().join(".not-an-ignore");
2023 mkdirp(td.path().join("a"));
2024 wfile(&igpath, "foo");
2025 wfile(td.path().join("foo"), "");
2026 wfile(td.path().join("a/foo"), "");
2027 wfile(td.path().join("bar"), "");
2028 wfile(td.path().join("a/bar"), "");
2029
2030 let mut builder = WalkBuilder::new(td.path());
2031 assert!(builder.add_ignore(&igpath).is_none());
2032 assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
2033 }
2034
2035 #[test]
2036 fn explicit_ignore_exclusive_use() {
2037 let td = tmpdir();
2038 let igpath = td.path().join(".not-an-ignore");
2039 mkdirp(td.path().join("a"));
2040 wfile(&igpath, "foo");
2041 wfile(td.path().join("foo"), "");
2042 wfile(td.path().join("a/foo"), "");
2043 wfile(td.path().join("bar"), "");
2044 wfile(td.path().join("a/bar"), "");
2045
2046 let mut builder = WalkBuilder::new(td.path());
2047 builder.standard_filters(false);
2048 assert!(builder.add_ignore(&igpath).is_none());
2049 assert_paths(
2050 td.path(),
2051 &builder,
2052 &[".not-an-ignore", "bar", "a", "a/bar"],
2053 );
2054 }
2055
2056 #[test]
2057 fn gitignore_parent() {
2058 let td = tmpdir();
2059 mkdirp(td.path().join(".git"));
2060 mkdirp(td.path().join("a"));
2061 wfile(td.path().join(".gitignore"), "foo");
2062 wfile(td.path().join("a/foo"), "");
2063 wfile(td.path().join("a/bar"), "");
2064
2065 let root = td.path().join("a");
2066 assert_paths(&root, &WalkBuilder::new(&root), &["bar"]);
2067 }
2068
2069 #[test]
2070 fn max_depth() {
2071 let td = tmpdir();
2072 mkdirp(td.path().join("a/b/c"));
2073 wfile(td.path().join("foo"), "");
2074 wfile(td.path().join("a/foo"), "");
2075 wfile(td.path().join("a/b/foo"), "");
2076 wfile(td.path().join("a/b/c/foo"), "");
2077
2078 let mut builder = WalkBuilder::new(td.path());
2079 assert_paths(
2080 td.path(),
2081 &builder,
2082 &["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"],
2083 );
2084 assert_paths(td.path(), builder.max_depth(Some(0)), &[]);
2085 assert_paths(td.path(), builder.max_depth(Some(1)), &["a", "foo"]);
2086 assert_paths(
2087 td.path(),
2088 builder.max_depth(Some(2)),
2089 &["a", "a/b", "foo", "a/foo"],
2090 );
2091 }
2092
2093 #[test]
2094 fn max_filesize() {
2095 let td = tmpdir();
2096 mkdirp(td.path().join("a/b"));
2097 wfile_size(td.path().join("foo"), 0);
2098 wfile_size(td.path().join("bar"), 400);
2099 wfile_size(td.path().join("baz"), 600);
2100 wfile_size(td.path().join("a/foo"), 600);
2101 wfile_size(td.path().join("a/bar"), 500);
2102 wfile_size(td.path().join("a/baz"), 200);
2103
2104 let mut builder = WalkBuilder::new(td.path());
2105 assert_paths(
2106 td.path(),
2107 &builder,
2108 &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
2109 );
2110 assert_paths(
2111 td.path(),
2112 builder.max_filesize(Some(0)),
2113 &["a", "a/b", "foo"],
2114 );
2115 assert_paths(
2116 td.path(),
2117 builder.max_filesize(Some(500)),
2118 &["a", "a/b", "foo", "bar", "a/bar", "a/baz"],
2119 );
2120 assert_paths(
2121 td.path(),
2122 builder.max_filesize(Some(50000)),
2123 &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
2124 );
2125 }
2126
2127 #[cfg(unix)] // because symlinks on windows are weird
2128 #[test]
2129 fn symlinks() {
2130 let td = tmpdir();
2131 mkdirp(td.path().join("a/b"));
2132 symlink(td.path().join("a/b"), td.path().join("z"));
2133 wfile(td.path().join("a/b/foo"), "");
2134
2135 let mut builder = WalkBuilder::new(td.path());
2136 assert_paths(td.path(), &builder, &["a", "a/b", "a/b/foo", "z"]);
2137 assert_paths(
2138 td.path(),
2139 &builder.follow_links(true),
2140 &["a", "a/b", "a/b/foo", "z", "z/foo"],
2141 );
2142 }
2143
2144 #[cfg(unix)] // because symlinks on windows are weird
2145 #[test]
2146 fn first_path_not_symlink() {
2147 let td = tmpdir();
2148 mkdirp(td.path().join("foo"));
2149
2150 let dents = WalkBuilder::new(td.path().join("foo"))
2151 .build()
2152 .into_iter()
2153 .collect::<Result<Vec<_>, _>>()
2154 .unwrap();
2155 assert_eq!(1, dents.len());
2156 assert!(!dents[0].path_is_symlink());
2157
2158 let dents = walk_collect_entries_parallel(&WalkBuilder::new(
2159 td.path().join("foo"),
2160 ));
2161 assert_eq!(1, dents.len());
2162 assert!(!dents[0].path_is_symlink());
2163 }
2164
2165 #[cfg(unix)] // because symlinks on windows are weird
2166 #[test]
2167 fn symlink_loop() {
2168 let td = tmpdir();
2169 mkdirp(td.path().join("a/b"));
2170 symlink(td.path().join("a"), td.path().join("a/b/c"));
2171
2172 let mut builder = WalkBuilder::new(td.path());
2173 assert_paths(td.path(), &builder, &["a", "a/b", "a/b/c"]);
2174 assert_paths(td.path(), &builder.follow_links(true), &["a", "a/b"]);
2175 }
2176
2177 // It's a little tricky to test the 'same_file_system' option since
2178 // we need an environment with more than one file system. We adopt a
2179 // heuristic where /sys is typically a distinct volume on Linux and roll
2180 // with that.
2181 #[test]
2182 #[cfg(target_os = "linux")]
2183 fn same_file_system() {
2184 use super::device_num;
2185
2186 // If for some reason /sys doesn't exist or isn't a directory, just
2187 // skip this test.
2188 if !Path::new("/sys").is_dir() {
2189 return;
2190 }
2191
2192 // If our test directory actually isn't a different volume from /sys,
2193 // then this test is meaningless and we shouldn't run it.
2194 let td = tmpdir();
2195 if device_num(td.path()).unwrap() == device_num("/sys").unwrap() {
2196 return;
2197 }
2198
2199 mkdirp(td.path().join("same_file"));
2200 symlink("/sys", td.path().join("same_file").join("alink"));
2201
2202 // Create a symlink to sys and enable following symlinks. If the
2203 // same_file_system option doesn't work, then this probably will hit a
2204 // permission error. Otherwise, it should just skip over the symlink
2205 // completely.
2206 let mut builder = WalkBuilder::new(td.path());
2207 builder.follow_links(true).same_file_system(true);
2208 assert_paths(td.path(), &builder, &["same_file", "same_file/alink"]);
2209 }
2210
2211 #[cfg(target_os = "linux")]
2212 #[test]
2213 fn no_read_permissions() {
2214 let dir_path = Path::new("/root");
2215
2216 // There's no /etc/sudoers.d, skip the test.
2217 if !dir_path.is_dir() {
2218 return;
2219 }
2220 // We're the root, so the test won't check what we want it to.
2221 if fs::read_dir(&dir_path).is_ok() {
2222 return;
2223 }
2224
2225 // Check that we can't descend but get an entry for the parent dir.
2226 let builder = WalkBuilder::new(&dir_path);
2227 assert_paths(dir_path.parent().unwrap(), &builder, &["root"]);
2228 }
2229
2230 #[test]
2231 fn filter() {
2232 let td = tmpdir();
2233 mkdirp(td.path().join("a/b/c"));
2234 mkdirp(td.path().join("x/y"));
2235 wfile(td.path().join("a/b/foo"), "");
2236 wfile(td.path().join("x/y/foo"), "");
2237
2238 assert_paths(
2239 td.path(),
2240 &WalkBuilder::new(td.path()),
2241 &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"],
2242 );
2243
2244 assert_paths(
2245 td.path(),
2246 &WalkBuilder::new(td.path())
2247 .filter_entry(|entry| entry.file_name() != OsStr::new("a")),
2248 &["x", "x/y", "x/y/foo"],
2249 );
2250 }
2251}
2252