1/*!
2Crate `walkdir` provides an efficient and cross platform implementation
3of recursive directory traversal. Several options are exposed to control
4iteration, such as whether to follow symbolic links (default off), limit the
5maximum number of simultaneous open file descriptors and the ability to
6efficiently skip descending into directories.
7
8To use this crate, add `walkdir` as a dependency to your project's
9`Cargo.toml`:
10
11```toml
12[dependencies]
13walkdir = "2"
14```
15
16# From the top
17
18The [`WalkDir`] type builds iterators. The [`DirEntry`] type describes values
19yielded by the iterator. Finally, the [`Error`] type is a small wrapper around
20[`std::io::Error`] with additional information, such as if a loop was detected
21while following symbolic links (not enabled by default).
22
23[`WalkDir`]: struct.WalkDir.html
24[`DirEntry`]: struct.DirEntry.html
25[`Error`]: struct.Error.html
26[`std::io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html
27
28# Example
29
30The following code recursively iterates over the directory given and prints
31the path for each entry:
32
33```no_run
34use walkdir::WalkDir;
35# use walkdir::Error;
36
37# fn try_main() -> Result<(), Error> {
38for entry in WalkDir::new("foo") {
39 println!("{}", entry?.path().display());
40}
41# Ok(())
42# }
43```
44
45Or, if you'd like to iterate over all entries and ignore any errors that
46may arise, use [`filter_map`]. (e.g., This code below will silently skip
47directories that the owner of the running process does not have permission to
48access.)
49
50```no_run
51use walkdir::WalkDir;
52
53for entry in WalkDir::new("foo").into_iter().filter_map(|e| e.ok()) {
54 println!("{}", entry.path().display());
55}
56```
57
58[`filter_map`]: https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.filter_map
59
60# Example: follow symbolic links
61
62The same code as above, except [`follow_links`] is enabled:
63
64```no_run
65use walkdir::WalkDir;
66# use walkdir::Error;
67
68# fn try_main() -> Result<(), Error> {
69for entry in WalkDir::new("foo").follow_links(true) {
70 println!("{}", entry?.path().display());
71}
72# Ok(())
73# }
74```
75
76[`follow_links`]: struct.WalkDir.html#method.follow_links
77
78# Example: skip hidden files and directories on unix
79
80This uses the [`filter_entry`] iterator adapter to avoid yielding hidden files
81and directories efficiently (i.e. without recursing into hidden directories):
82
83```no_run
84use walkdir::{DirEntry, WalkDir};
85# use walkdir::Error;
86
87fn is_hidden(entry: &DirEntry) -> bool {
88 entry.file_name()
89 .to_str()
90 .map(|s| s.starts_with("."))
91 .unwrap_or(false)
92}
93
94# fn try_main() -> Result<(), Error> {
95let walker = WalkDir::new("foo").into_iter();
96for entry in walker.filter_entry(|e| !is_hidden(e)) {
97 println!("{}", entry?.path().display());
98}
99# Ok(())
100# }
101```
102
103[`filter_entry`]: struct.IntoIter.html#method.filter_entry
104*/
105
106#![deny(missing_docs)]
107#![allow(unknown_lints)]
108
109#[cfg(doctest)]
110doc_comment::doctest!("../README.md");
111
112use std::cmp::{min, Ordering};
113use std::fmt;
114use std::fs::{self, ReadDir};
115use std::io;
116use std::path::{Path, PathBuf};
117use std::result;
118use std::vec;
119
120use same_file::Handle;
121
122pub use crate::dent::DirEntry;
123#[cfg(unix)]
124pub use crate::dent::DirEntryExt;
125pub use crate::error::Error;
126
127mod dent;
128mod error;
129#[cfg(test)]
130mod tests;
131mod util;
132
133/// Like try, but for iterators that return [`Option<Result<_, _>>`].
134///
135/// [`Option<Result<_, _>>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html
136macro_rules! itry {
137 ($e:expr) => {
138 match $e {
139 Ok(v) => v,
140 Err(err) => return Some(Err(From::from(err))),
141 }
142 };
143}
144
145/// A result type for walkdir operations.
146///
147/// Note that this result type embeds the error type in this crate. This
148/// is only useful if you care about the additional information provided by
149/// the error (such as the path associated with the error or whether a loop
150/// was dectected). If you want things to Just Work, then you can use
151/// [`io::Result`] instead since the error type in this package will
152/// automatically convert to an [`io::Result`] when using the [`try!`] macro.
153///
154/// [`io::Result`]: https://doc.rust-lang.org/stable/std/io/type.Result.html
155/// [`try!`]: https://doc.rust-lang.org/stable/std/macro.try.html
156pub type Result<T> = ::std::result::Result<T, Error>;
157
158/// A builder to create an iterator for recursively walking a directory.
159///
160/// Results are returned in depth first fashion, with directories yielded
161/// before their contents. If [`contents_first`] is true, contents are yielded
162/// before their directories. The order is unspecified but if [`sort_by`] is
163/// given, directory entries are sorted according to this function. Directory
164/// entries `.` and `..` are always omitted.
165///
166/// If an error occurs at any point during iteration, then it is returned in
167/// place of its corresponding directory entry and iteration continues as
168/// normal. If an error occurs while opening a directory for reading, then it
169/// is not descended into (but the error is still yielded by the iterator).
170/// Iteration may be stopped at any time. When the iterator is destroyed, all
171/// resources associated with it are freed.
172///
173/// [`contents_first`]: struct.WalkDir.html#method.contents_first
174/// [`sort_by`]: struct.WalkDir.html#method.sort_by
175///
176/// # Usage
177///
178/// This type implements [`IntoIterator`] so that it may be used as the subject
179/// of a `for` loop. You may need to call [`into_iter`] explicitly if you want
180/// to use iterator adapters such as [`filter_entry`].
181///
182/// Idiomatic use of this type should use method chaining to set desired
183/// options. For example, this only shows entries with a depth of `1`, `2` or
184/// `3` (relative to `foo`):
185///
186/// ```no_run
187/// use walkdir::WalkDir;
188/// # use walkdir::Error;
189///
190/// # fn try_main() -> Result<(), Error> {
191/// for entry in WalkDir::new("foo").min_depth(1).max_depth(3) {
192/// println!("{}", entry?.path().display());
193/// }
194/// # Ok(())
195/// # }
196/// ```
197///
198/// [`IntoIterator`]: https://doc.rust-lang.org/stable/std/iter/trait.IntoIterator.html
199/// [`into_iter`]: https://doc.rust-lang.org/nightly/core/iter/trait.IntoIterator.html#tymethod.into_iter
200/// [`filter_entry`]: struct.IntoIter.html#method.filter_entry
201///
202/// Note that the iterator by default includes the top-most directory. Since
203/// this is the only directory yielded with depth `0`, it is easy to ignore it
204/// with the [`min_depth`] setting:
205///
206/// ```no_run
207/// use walkdir::WalkDir;
208/// # use walkdir::Error;
209///
210/// # fn try_main() -> Result<(), Error> {
211/// for entry in WalkDir::new("foo").min_depth(1) {
212/// println!("{}", entry?.path().display());
213/// }
214/// # Ok(())
215/// # }
216/// ```
217///
218/// [`min_depth`]: struct.WalkDir.html#method.min_depth
219///
220/// This will only return descendents of the `foo` directory and not `foo`
221/// itself.
222///
223/// # Loops
224///
225/// This iterator (like most/all recursive directory iterators) assumes that
226/// no loops can be made with *hard* links on your file system. In particular,
227/// this would require creating a hard link to a directory such that it creates
228/// a loop. On most platforms, this operation is illegal.
229///
230/// Note that when following symbolic/soft links, loops are detected and an
231/// error is reported.
232#[derive(Debug)]
233pub struct WalkDir {
234 opts: WalkDirOptions,
235 root: PathBuf,
236}
237
238struct WalkDirOptions {
239 follow_links: bool,
240 max_open: usize,
241 min_depth: usize,
242 max_depth: usize,
243 sorter: Option<
244 Box<
245 dyn FnMut(&DirEntry, &DirEntry) -> Ordering
246 + Send
247 + Sync
248 + 'static,
249 >,
250 >,
251 contents_first: bool,
252 same_file_system: bool,
253}
254
255impl fmt::Debug for WalkDirOptions {
256 fn fmt(
257 &self,
258 f: &mut fmt::Formatter<'_>,
259 ) -> result::Result<(), fmt::Error> {
260 let sorter_str: &str = if self.sorter.is_some() {
261 // FnMut isn't `Debug`
262 "Some(...)"
263 } else {
264 "None"
265 };
266 f&mut DebugStruct<'_, '_>.debug_struct("WalkDirOptions")
267 .field("follow_links", &self.follow_links)
268 .field("max_open", &self.max_open)
269 .field("min_depth", &self.min_depth)
270 .field("max_depth", &self.max_depth)
271 .field("sorter", &sorter_str)
272 .field("contents_first", &self.contents_first)
273 .field(name:"same_file_system", &self.same_file_system)
274 .finish()
275 }
276}
277
278impl WalkDir {
279 /// Create a builder for a recursive directory iterator starting at the
280 /// file path `root`. If `root` is a directory, then it is the first item
281 /// yielded by the iterator. If `root` is a file, then it is the first
282 /// and only item yielded by the iterator. If `root` is a symlink, then it
283 /// is always followed for the purposes of directory traversal. (A root
284 /// `DirEntry` still obeys its documentation with respect to symlinks and
285 /// the `follow_links` setting.)
286 pub fn new<P: AsRef<Path>>(root: P) -> Self {
287 WalkDir {
288 opts: WalkDirOptions {
289 follow_links: false,
290 max_open: 10,
291 min_depth: 0,
292 max_depth: ::std::usize::MAX,
293 sorter: None,
294 contents_first: false,
295 same_file_system: false,
296 },
297 root: root.as_ref().to_path_buf(),
298 }
299 }
300
301 /// Set the minimum depth of entries yielded by the iterator.
302 ///
303 /// The smallest depth is `0` and always corresponds to the path given
304 /// to the `new` function on this type. Its direct descendents have depth
305 /// `1`, and their descendents have depth `2`, and so on.
306 pub fn min_depth(mut self, depth: usize) -> Self {
307 self.opts.min_depth = depth;
308 if self.opts.min_depth > self.opts.max_depth {
309 self.opts.min_depth = self.opts.max_depth;
310 }
311 self
312 }
313
314 /// Set the maximum depth of entries yield by the iterator.
315 ///
316 /// The smallest depth is `0` and always corresponds to the path given
317 /// to the `new` function on this type. Its direct descendents have depth
318 /// `1`, and their descendents have depth `2`, and so on.
319 ///
320 /// Note that this will not simply filter the entries of the iterator, but
321 /// it will actually avoid descending into directories when the depth is
322 /// exceeded.
323 pub fn max_depth(mut self, depth: usize) -> Self {
324 self.opts.max_depth = depth;
325 if self.opts.max_depth < self.opts.min_depth {
326 self.opts.max_depth = self.opts.min_depth;
327 }
328 self
329 }
330
331 /// Follow symbolic links. By default, this is disabled.
332 ///
333 /// When `yes` is `true`, symbolic links are followed as if they were
334 /// normal directories and files. If a symbolic link is broken or is
335 /// involved in a loop, an error is yielded.
336 ///
337 /// When enabled, the yielded [`DirEntry`] values represent the target of
338 /// the link while the path corresponds to the link. See the [`DirEntry`]
339 /// type for more details.
340 ///
341 /// [`DirEntry`]: struct.DirEntry.html
342 pub fn follow_links(mut self, yes: bool) -> Self {
343 self.opts.follow_links = yes;
344 self
345 }
346
347 /// Set the maximum number of simultaneously open file descriptors used
348 /// by the iterator.
349 ///
350 /// `n` must be greater than or equal to `1`. If `n` is `0`, then it is set
351 /// to `1` automatically. If this is not set, then it defaults to some
352 /// reasonably low number.
353 ///
354 /// This setting has no impact on the results yielded by the iterator
355 /// (even when `n` is `1`). Instead, this setting represents a trade off
356 /// between scarce resources (file descriptors) and memory. Namely, when
357 /// the maximum number of file descriptors is reached and a new directory
358 /// needs to be opened to continue iteration, then a previous directory
359 /// handle is closed and has its unyielded entries stored in memory. In
360 /// practice, this is a satisfying trade off because it scales with respect
361 /// to the *depth* of your file tree. Therefore, low values (even `1`) are
362 /// acceptable.
363 ///
364 /// Note that this value does not impact the number of system calls made by
365 /// an exhausted iterator.
366 ///
367 /// # Platform behavior
368 ///
369 /// On Windows, if `follow_links` is enabled, then this limit is not
370 /// respected. In particular, the maximum number of file descriptors opened
371 /// is proportional to the depth of the directory tree traversed.
372 pub fn max_open(mut self, mut n: usize) -> Self {
373 if n == 0 {
374 n = 1;
375 }
376 self.opts.max_open = n;
377 self
378 }
379
380 /// Set a function for sorting directory entries with a comparator
381 /// function.
382 ///
383 /// If a compare function is set, the resulting iterator will return all
384 /// paths in sorted order. The compare function will be called to compare
385 /// entries from the same directory.
386 ///
387 /// ```rust,no_run
388 /// use std::cmp;
389 /// use std::ffi::OsString;
390 /// use walkdir::WalkDir;
391 ///
392 /// WalkDir::new("foo").sort_by(|a,b| a.file_name().cmp(b.file_name()));
393 /// ```
394 pub fn sort_by<F>(mut self, cmp: F) -> Self
395 where
396 F: FnMut(&DirEntry, &DirEntry) -> Ordering + Send + Sync + 'static,
397 {
398 self.opts.sorter = Some(Box::new(cmp));
399 self
400 }
401
402 /// Set a function for sorting directory entries with a key extraction
403 /// function.
404 ///
405 /// If a compare function is set, the resulting iterator will return all
406 /// paths in sorted order. The compare function will be called to compare
407 /// entries from the same directory.
408 ///
409 /// ```rust,no_run
410 /// use std::cmp;
411 /// use std::ffi::OsString;
412 /// use walkdir::WalkDir;
413 ///
414 /// WalkDir::new("foo").sort_by_key(|a| a.file_name().to_owned());
415 /// ```
416 pub fn sort_by_key<K, F>(self, mut cmp: F) -> Self
417 where
418 F: FnMut(&DirEntry) -> K + Send + Sync + 'static,
419 K: Ord,
420 {
421 self.sort_by(move |a, b| cmp(a).cmp(&cmp(b)))
422 }
423
424 /// Sort directory entries by file name, to ensure a deterministic order.
425 ///
426 /// This is a convenience function for calling `Self::sort_by()`.
427 ///
428 /// ```rust,no_run
429 /// use walkdir::WalkDir;
430 ///
431 /// WalkDir::new("foo").sort_by_file_name();
432 /// ```
433 pub fn sort_by_file_name(self) -> Self {
434 self.sort_by(|a, b| a.file_name().cmp(b.file_name()))
435 }
436
437 /// Yield a directory's contents before the directory itself. By default,
438 /// this is disabled.
439 ///
440 /// When `yes` is `false` (as is the default), the directory is yielded
441 /// before its contents are read. This is useful when, e.g. you want to
442 /// skip processing of some directories.
443 ///
444 /// When `yes` is `true`, the iterator yields the contents of a directory
445 /// before yielding the directory itself. This is useful when, e.g. you
446 /// want to recursively delete a directory.
447 ///
448 /// # Example
449 ///
450 /// Assume the following directory tree:
451 ///
452 /// ```text
453 /// foo/
454 /// abc/
455 /// qrs
456 /// tuv
457 /// def/
458 /// ```
459 ///
460 /// With contents_first disabled (the default), the following code visits
461 /// the directory tree in depth-first order:
462 ///
463 /// ```no_run
464 /// use walkdir::WalkDir;
465 ///
466 /// for entry in WalkDir::new("foo") {
467 /// let entry = entry.unwrap();
468 /// println!("{}", entry.path().display());
469 /// }
470 ///
471 /// // foo
472 /// // foo/abc
473 /// // foo/abc/qrs
474 /// // foo/abc/tuv
475 /// // foo/def
476 /// ```
477 ///
478 /// With contents_first enabled:
479 ///
480 /// ```no_run
481 /// use walkdir::WalkDir;
482 ///
483 /// for entry in WalkDir::new("foo").contents_first(true) {
484 /// let entry = entry.unwrap();
485 /// println!("{}", entry.path().display());
486 /// }
487 ///
488 /// // foo/abc/qrs
489 /// // foo/abc/tuv
490 /// // foo/abc
491 /// // foo/def
492 /// // foo
493 /// ```
494 pub fn contents_first(mut self, yes: bool) -> Self {
495 self.opts.contents_first = yes;
496 self
497 }
498
499 /// Do not cross file system boundaries.
500 ///
501 /// When this option is enabled, directory traversal will not descend into
502 /// directories that are on a different file system from the root path.
503 ///
504 /// Currently, this option is only supported on Unix and Windows. If this
505 /// option is used on an unsupported platform, then directory traversal
506 /// will immediately return an error and will not yield any entries.
507 pub fn same_file_system(mut self, yes: bool) -> Self {
508 self.opts.same_file_system = yes;
509 self
510 }
511}
512
513impl IntoIterator for WalkDir {
514 type Item = Result<DirEntry>;
515 type IntoIter = IntoIter;
516
517 fn into_iter(self) -> IntoIter {
518 IntoIter {
519 opts: self.opts,
520 start: Some(self.root),
521 stack_list: vec![],
522 stack_path: vec![],
523 oldest_opened: 0,
524 depth: 0,
525 deferred_dirs: vec![],
526 root_device: None,
527 }
528 }
529}
530
531/// An iterator for recursively descending into a directory.
532///
533/// A value with this type must be constructed with the [`WalkDir`] type, which
534/// uses a builder pattern to set options such as min/max depth, max open file
535/// descriptors and whether the iterator should follow symbolic links. After
536/// constructing a `WalkDir`, call [`.into_iter()`] at the end of the chain.
537///
538/// The order of elements yielded by this iterator is unspecified.
539///
540/// [`WalkDir`]: struct.WalkDir.html
541/// [`.into_iter()`]: struct.WalkDir.html#into_iter.v
542#[derive(Debug)]
543pub struct IntoIter {
544 /// Options specified in the builder. Depths, max fds, etc.
545 opts: WalkDirOptions,
546 /// The start path.
547 ///
548 /// This is only `Some(...)` at the beginning. After the first iteration,
549 /// this is always `None`.
550 start: Option<PathBuf>,
551 /// A stack of open (up to max fd) or closed handles to directories.
552 /// An open handle is a plain [`fs::ReadDir`] while a closed handle is
553 /// a `Vec<fs::DirEntry>` corresponding to the as-of-yet consumed entries.
554 ///
555 /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html
556 stack_list: Vec<DirList>,
557 /// A stack of file paths.
558 ///
559 /// This is *only* used when [`follow_links`] is enabled. In all other
560 /// cases this stack is empty.
561 ///
562 /// [`follow_links`]: struct.WalkDir.html#method.follow_links
563 stack_path: Vec<Ancestor>,
564 /// An index into `stack_list` that points to the oldest open directory
565 /// handle. If the maximum fd limit is reached and a new directory needs to
566 /// be read, the handle at this index is closed before the new directory is
567 /// opened.
568 oldest_opened: usize,
569 /// The current depth of iteration (the length of the stack at the
570 /// beginning of each iteration).
571 depth: usize,
572 /// A list of DirEntries corresponding to directories, that are
573 /// yielded after their contents has been fully yielded. This is only
574 /// used when `contents_first` is enabled.
575 deferred_dirs: Vec<DirEntry>,
576 /// The device of the root file path when the first call to `next` was
577 /// made.
578 ///
579 /// If the `same_file_system` option isn't enabled, then this is always
580 /// `None`. Conversely, if it is enabled, this is always `Some(...)` after
581 /// handling the root path.
582 root_device: Option<u64>,
583}
584
585/// An ancestor is an item in the directory tree traversed by walkdir, and is
586/// used to check for loops in the tree when traversing symlinks.
587#[derive(Debug)]
588struct Ancestor {
589 /// The path of this ancestor.
590 path: PathBuf,
591 /// An open file to this ancesor. This is only used on Windows where
592 /// opening a file handle appears to be quite expensive, so we choose to
593 /// cache it. This comes at the cost of not respecting the file descriptor
594 /// limit set by the user.
595 #[cfg(windows)]
596 handle: Handle,
597}
598
599impl Ancestor {
600 /// Create a new ancestor from the given directory path.
601 #[cfg(windows)]
602 fn new(dent: &DirEntry) -> io::Result<Ancestor> {
603 let handle = Handle::from_path(dent.path())?;
604 Ok(Ancestor { path: dent.path().to_path_buf(), handle })
605 }
606
607 /// Create a new ancestor from the given directory path.
608 #[cfg(not(windows))]
609 fn new(dent: &DirEntry) -> io::Result<Ancestor> {
610 Ok(Ancestor { path: dent.path().to_path_buf() })
611 }
612
613 /// Returns true if and only if the given open file handle corresponds to
614 /// the same directory as this ancestor.
615 #[cfg(windows)]
616 fn is_same(&self, child: &Handle) -> io::Result<bool> {
617 Ok(child == &self.handle)
618 }
619
620 /// Returns true if and only if the given open file handle corresponds to
621 /// the same directory as this ancestor.
622 #[cfg(not(windows))]
623 fn is_same(&self, child: &Handle) -> io::Result<bool> {
624 Ok(child == &Handle::from_path(&self.path)?)
625 }
626}
627
628/// A sequence of unconsumed directory entries.
629///
630/// This represents the opened or closed state of a directory handle. When
631/// open, future entries are read by iterating over the raw `fs::ReadDir`.
632/// When closed, all future entries are read into memory. Iteration then
633/// proceeds over a [`Vec<fs::DirEntry>`].
634///
635/// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html
636/// [`Vec<fs::DirEntry>`]: https://doc.rust-lang.org/stable/std/vec/struct.Vec.html
637#[derive(Debug)]
638enum DirList {
639 /// An opened handle.
640 ///
641 /// This includes the depth of the handle itself.
642 ///
643 /// If there was an error with the initial [`fs::read_dir`] call, then it
644 /// is stored here. (We use an [`Option<...>`] to make yielding the error
645 /// exactly once simpler.)
646 ///
647 /// [`fs::read_dir`]: https://doc.rust-lang.org/stable/std/fs/fn.read_dir.html
648 /// [`Option<...>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html
649 Opened { depth: usize, it: result::Result<ReadDir, Option<Error>> },
650 /// A closed handle.
651 ///
652 /// All remaining directory entries are read into memory.
653 Closed(vec::IntoIter<Result<DirEntry>>),
654}
655
656impl Iterator for IntoIter {
657 type Item = Result<DirEntry>;
658 /// Advances the iterator and returns the next value.
659 ///
660 /// # Errors
661 ///
662 /// If the iterator fails to retrieve the next value, this method returns
663 /// an error value. The error will be wrapped in an Option::Some.
664 fn next(&mut self) -> Option<Result<DirEntry>> {
665 if let Some(start) = self.start.take() {
666 if self.opts.same_file_system {
667 let result = util::device_num(&start)
668 .map_err(|e| Error::from_path(0, start.clone(), e));
669 self.root_device = Some(itry!(result));
670 }
671 let dent = itry!(DirEntry::from_path(0, start, false));
672 if let Some(result) = self.handle_entry(dent) {
673 return Some(result);
674 }
675 }
676 while !self.stack_list.is_empty() {
677 self.depth = self.stack_list.len();
678 if let Some(dentry) = self.get_deferred_dir() {
679 return Some(Ok(dentry));
680 }
681 if self.depth > self.opts.max_depth {
682 // If we've exceeded the max depth, pop the current dir
683 // so that we don't descend.
684 self.pop();
685 continue;
686 }
687 // Unwrap is safe here because we've verified above that
688 // `self.stack_list` is not empty
689 let next = self
690 .stack_list
691 .last_mut()
692 .expect("BUG: stack should be non-empty")
693 .next();
694 match next {
695 None => self.pop(),
696 Some(Err(err)) => return Some(Err(err)),
697 Some(Ok(dent)) => {
698 if let Some(result) = self.handle_entry(dent) {
699 return Some(result);
700 }
701 }
702 }
703 }
704 if self.opts.contents_first {
705 self.depth = self.stack_list.len();
706 if let Some(dentry) = self.get_deferred_dir() {
707 return Some(Ok(dentry));
708 }
709 }
710 None
711 }
712}
713
714impl IntoIter {
715 /// Skips the current directory.
716 ///
717 /// This causes the iterator to stop traversing the contents of the least
718 /// recently yielded directory. This means any remaining entries in that
719 /// directory will be skipped (including sub-directories).
720 ///
721 /// Note that the ergonomics of this method are questionable since it
722 /// borrows the iterator mutably. Namely, you must write out the looping
723 /// condition manually. For example, to skip hidden entries efficiently on
724 /// unix systems:
725 ///
726 /// ```no_run
727 /// use walkdir::{DirEntry, WalkDir};
728 ///
729 /// fn is_hidden(entry: &DirEntry) -> bool {
730 /// entry.file_name()
731 /// .to_str()
732 /// .map(|s| s.starts_with("."))
733 /// .unwrap_or(false)
734 /// }
735 ///
736 /// let mut it = WalkDir::new("foo").into_iter();
737 /// loop {
738 /// let entry = match it.next() {
739 /// None => break,
740 /// Some(Err(err)) => panic!("ERROR: {}", err),
741 /// Some(Ok(entry)) => entry,
742 /// };
743 /// if is_hidden(&entry) {
744 /// if entry.file_type().is_dir() {
745 /// it.skip_current_dir();
746 /// }
747 /// continue;
748 /// }
749 /// println!("{}", entry.path().display());
750 /// }
751 /// ```
752 ///
753 /// You may find it more convenient to use the [`filter_entry`] iterator
754 /// adapter. (See its documentation for the same example functionality as
755 /// above.)
756 ///
757 /// [`filter_entry`]: #method.filter_entry
758 pub fn skip_current_dir(&mut self) {
759 if !self.stack_list.is_empty() {
760 self.pop();
761 }
762 }
763
764 /// Yields only entries which satisfy the given predicate and skips
765 /// descending into directories that do not satisfy the given predicate.
766 ///
767 /// The predicate is applied to all entries. If the predicate is
768 /// true, iteration carries on as normal. If the predicate is false, the
769 /// entry is ignored and if it is a directory, it is not descended into.
770 ///
771 /// This is often more convenient to use than [`skip_current_dir`]. For
772 /// example, to skip hidden files and directories efficiently on unix
773 /// systems:
774 ///
775 /// ```no_run
776 /// use walkdir::{DirEntry, WalkDir};
777 /// # use walkdir::Error;
778 ///
779 /// fn is_hidden(entry: &DirEntry) -> bool {
780 /// entry.file_name()
781 /// .to_str()
782 /// .map(|s| s.starts_with("."))
783 /// .unwrap_or(false)
784 /// }
785 ///
786 /// # fn try_main() -> Result<(), Error> {
787 /// for entry in WalkDir::new("foo")
788 /// .into_iter()
789 /// .filter_entry(|e| !is_hidden(e)) {
790 /// println!("{}", entry?.path().display());
791 /// }
792 /// # Ok(())
793 /// # }
794 /// ```
795 ///
796 /// Note that the iterator will still yield errors for reading entries that
797 /// may not satisfy the predicate.
798 ///
799 /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not
800 /// passed to this predicate.
801 ///
802 /// Note that if the iterator has `contents_first` enabled, then this
803 /// method is no different than calling the standard `Iterator::filter`
804 /// method (because directory entries are yielded after they've been
805 /// descended into).
806 ///
807 /// [`skip_current_dir`]: #method.skip_current_dir
808 /// [`min_depth`]: struct.WalkDir.html#method.min_depth
809 /// [`max_depth`]: struct.WalkDir.html#method.max_depth
810 pub fn filter_entry<P>(self, predicate: P) -> FilterEntry<Self, P>
811 where
812 P: FnMut(&DirEntry) -> bool,
813 {
814 FilterEntry { it: self, predicate }
815 }
816
817 fn handle_entry(
818 &mut self,
819 mut dent: DirEntry,
820 ) -> Option<Result<DirEntry>> {
821 if self.opts.follow_links && dent.file_type().is_symlink() {
822 dent = itry!(self.follow(dent));
823 }
824 let is_normal_dir = !dent.file_type().is_symlink() && dent.is_dir();
825 if is_normal_dir {
826 if self.opts.same_file_system && dent.depth() > 0 {
827 if itry!(self.is_same_file_system(&dent)) {
828 itry!(self.push(&dent));
829 }
830 } else {
831 itry!(self.push(&dent));
832 }
833 } else if dent.depth() == 0 && dent.file_type().is_symlink() {
834 // As a special case, if we are processing a root entry, then we
835 // always follow it even if it's a symlink and follow_links is
836 // false. We are careful to not let this change the semantics of
837 // the DirEntry however. Namely, the DirEntry should still respect
838 // the follow_links setting. When it's disabled, it should report
839 // itself as a symlink. When it's enabled, it should always report
840 // itself as the target.
841 let md = itry!(fs::metadata(dent.path()).map_err(|err| {
842 Error::from_path(dent.depth(), dent.path().to_path_buf(), err)
843 }));
844 if md.file_type().is_dir() {
845 itry!(self.push(&dent));
846 }
847 }
848 if is_normal_dir && self.opts.contents_first {
849 self.deferred_dirs.push(dent);
850 None
851 } else if self.skippable() {
852 None
853 } else {
854 Some(Ok(dent))
855 }
856 }
857
858 fn get_deferred_dir(&mut self) -> Option<DirEntry> {
859 if self.opts.contents_first {
860 if self.depth < self.deferred_dirs.len() {
861 // Unwrap is safe here because we've guaranteed that
862 // `self.deferred_dirs.len()` can never be less than 1
863 let deferred: DirEntry = self
864 .deferred_dirs
865 .pop()
866 .expect("BUG: deferred_dirs should be non-empty");
867 if !self.skippable() {
868 return Some(deferred);
869 }
870 }
871 }
872 None
873 }
874
875 fn push(&mut self, dent: &DirEntry) -> Result<()> {
876 // Make room for another open file descriptor if we've hit the max.
877 let free =
878 self.stack_list.len().checked_sub(self.oldest_opened).unwrap();
879 if free == self.opts.max_open {
880 self.stack_list[self.oldest_opened].close();
881 }
882 // Open a handle to reading the directory's entries.
883 let rd = fs::read_dir(dent.path()).map_err(|err| {
884 Some(Error::from_path(self.depth, dent.path().to_path_buf(), err))
885 });
886 let mut list = DirList::Opened { depth: self.depth, it: rd };
887 if let Some(ref mut cmp) = self.opts.sorter {
888 let mut entries: Vec<_> = list.collect();
889 entries.sort_by(|a, b| match (a, b) {
890 (&Ok(ref a), &Ok(ref b)) => cmp(a, b),
891 (&Err(_), &Err(_)) => Ordering::Equal,
892 (&Ok(_), &Err(_)) => Ordering::Greater,
893 (&Err(_), &Ok(_)) => Ordering::Less,
894 });
895 list = DirList::Closed(entries.into_iter());
896 }
897 if self.opts.follow_links {
898 let ancestor = Ancestor::new(&dent)
899 .map_err(|err| Error::from_io(self.depth, err))?;
900 self.stack_path.push(ancestor);
901 }
902 // We push this after stack_path since creating the Ancestor can fail.
903 // If it fails, then we return the error and won't descend.
904 self.stack_list.push(list);
905 // If we had to close out a previous directory stream, then we need to
906 // increment our index the oldest still-open stream. We do this only
907 // after adding to our stack, in order to ensure that the oldest_opened
908 // index remains valid. The worst that can happen is that an already
909 // closed stream will be closed again, which is a no-op.
910 //
911 // We could move the close of the stream above into this if-body, but
912 // then we would have more than the maximum number of file descriptors
913 // open at a particular point in time.
914 if free == self.opts.max_open {
915 // Unwrap is safe here because self.oldest_opened is guaranteed to
916 // never be greater than `self.stack_list.len()`, which implies
917 // that the subtraction won't underflow and that adding 1 will
918 // never overflow.
919 self.oldest_opened = self.oldest_opened.checked_add(1).unwrap();
920 }
921 Ok(())
922 }
923
924 fn pop(&mut self) {
925 self.stack_list.pop().expect("BUG: cannot pop from empty stack");
926 if self.opts.follow_links {
927 self.stack_path.pop().expect("BUG: list/path stacks out of sync");
928 }
929 // If everything in the stack is already closed, then there is
930 // room for at least one more open descriptor and it will
931 // always be at the top of the stack.
932 self.oldest_opened = min(self.oldest_opened, self.stack_list.len());
933 }
934
935 fn follow(&self, mut dent: DirEntry) -> Result<DirEntry> {
936 dent =
937 DirEntry::from_path(self.depth, dent.path().to_path_buf(), true)?;
938 // The only way a symlink can cause a loop is if it points
939 // to a directory. Otherwise, it always points to a leaf
940 // and we can omit any loop checks.
941 if dent.is_dir() {
942 self.check_loop(dent.path())?;
943 }
944 Ok(dent)
945 }
946
947 fn check_loop<P: AsRef<Path>>(&self, child: P) -> Result<()> {
948 let hchild = Handle::from_path(&child)
949 .map_err(|err| Error::from_io(self.depth, err))?;
950 for ancestor in self.stack_path.iter().rev() {
951 let is_same = ancestor
952 .is_same(&hchild)
953 .map_err(|err| Error::from_io(self.depth, err))?;
954 if is_same {
955 return Err(Error::from_loop(
956 self.depth,
957 &ancestor.path,
958 child.as_ref(),
959 ));
960 }
961 }
962 Ok(())
963 }
964
965 fn is_same_file_system(&mut self, dent: &DirEntry) -> Result<bool> {
966 let dent_device = util::device_num(dent.path())
967 .map_err(|err| Error::from_entry(dent, err))?;
968 Ok(self
969 .root_device
970 .map(|d| d == dent_device)
971 .expect("BUG: called is_same_file_system without root device"))
972 }
973
974 fn skippable(&self) -> bool {
975 self.depth < self.opts.min_depth || self.depth > self.opts.max_depth
976 }
977}
978
979impl DirList {
980 fn close(&mut self) {
981 if let DirList::Opened { .. } = *self {
982 *self = DirList::Closed(self.collect::<Vec<_>>().into_iter());
983 }
984 }
985}
986
987impl Iterator for DirList {
988 type Item = Result<DirEntry>;
989
990 #[inline(always)]
991 fn next(&mut self) -> Option<Result<DirEntry>> {
992 match *self {
993 DirList::Closed(ref mut it: &mut IntoIter>) => it.next(),
994 DirList::Opened { depth: usize, ref mut it: &mut Result> } => match *it {
995 Err(ref mut err: &mut Option) => err.take().map(Err),
996 Ok(ref mut rd: &mut ReadDir) => rd.next().map(|r: Result| match r {
997 Ok(r: DirEntry) => DirEntry::from_entry(depth:depth + 1, &r),
998 Err(err: Error) => Err(Error::from_io(depth:depth + 1, err)),
999 }),
1000 },
1001 }
1002 }
1003}
1004
1005/// A recursive directory iterator that skips entries.
1006///
1007/// Values of this type are created by calling [`.filter_entry()`] on an
1008/// `IntoIter`, which is formed by calling [`.into_iter()`] on a `WalkDir`.
1009///
1010/// Directories that fail the predicate `P` are skipped. Namely, they are
1011/// never yielded and never descended into.
1012///
1013/// Entries that are skipped with the [`min_depth`] and [`max_depth`] options
1014/// are not passed through this filter.
1015///
1016/// If opening a handle to a directory resulted in an error, then it is yielded
1017/// and no corresponding call to the predicate is made.
1018///
1019/// Type parameter `I` refers to the underlying iterator and `P` refers to the
1020/// predicate, which is usually `FnMut(&DirEntry) -> bool`.
1021///
1022/// [`.filter_entry()`]: struct.IntoIter.html#method.filter_entry
1023/// [`.into_iter()`]: struct.WalkDir.html#into_iter.v
1024/// [`min_depth`]: struct.WalkDir.html#method.min_depth
1025/// [`max_depth`]: struct.WalkDir.html#method.max_depth
1026#[derive(Debug)]
1027pub struct FilterEntry<I, P> {
1028 it: I,
1029 predicate: P,
1030}
1031
1032impl<P> Iterator for FilterEntry<IntoIter, P>
1033where
1034 P: FnMut(&DirEntry) -> bool,
1035{
1036 type Item = Result<DirEntry>;
1037
1038 /// Advances the iterator and returns the next value.
1039 ///
1040 /// # Errors
1041 ///
1042 /// If the iterator fails to retrieve the next value, this method returns
1043 /// an error value. The error will be wrapped in an `Option::Some`.
1044 fn next(&mut self) -> Option<Result<DirEntry>> {
1045 loop {
1046 let dent = match self.it.next() {
1047 None => return None,
1048 Some(result) => itry!(result),
1049 };
1050 if !(self.predicate)(&dent) {
1051 if dent.is_dir() {
1052 self.it.skip_current_dir();
1053 }
1054 continue;
1055 }
1056 return Some(Ok(dent));
1057 }
1058 }
1059}
1060
1061impl<P> FilterEntry<IntoIter, P>
1062where
1063 P: FnMut(&DirEntry) -> bool,
1064{
1065 /// Yields only entries which satisfy the given predicate and skips
1066 /// descending into directories that do not satisfy the given predicate.
1067 ///
1068 /// The predicate is applied to all entries. If the predicate is
1069 /// true, iteration carries on as normal. If the predicate is false, the
1070 /// entry is ignored and if it is a directory, it is not descended into.
1071 ///
1072 /// This is often more convenient to use than [`skip_current_dir`]. For
1073 /// example, to skip hidden files and directories efficiently on unix
1074 /// systems:
1075 ///
1076 /// ```no_run
1077 /// use walkdir::{DirEntry, WalkDir};
1078 /// # use walkdir::Error;
1079 ///
1080 /// fn is_hidden(entry: &DirEntry) -> bool {
1081 /// entry.file_name()
1082 /// .to_str()
1083 /// .map(|s| s.starts_with("."))
1084 /// .unwrap_or(false)
1085 /// }
1086 ///
1087 /// # fn try_main() -> Result<(), Error> {
1088 /// for entry in WalkDir::new("foo")
1089 /// .into_iter()
1090 /// .filter_entry(|e| !is_hidden(e)) {
1091 /// println!("{}", entry?.path().display());
1092 /// }
1093 /// # Ok(())
1094 /// # }
1095 /// ```
1096 ///
1097 /// Note that the iterator will still yield errors for reading entries that
1098 /// may not satisfy the predicate.
1099 ///
1100 /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not
1101 /// passed to this predicate.
1102 ///
1103 /// Note that if the iterator has `contents_first` enabled, then this
1104 /// method is no different than calling the standard `Iterator::filter`
1105 /// method (because directory entries are yielded after they've been
1106 /// descended into).
1107 ///
1108 /// [`skip_current_dir`]: #method.skip_current_dir
1109 /// [`min_depth`]: struct.WalkDir.html#method.min_depth
1110 /// [`max_depth`]: struct.WalkDir.html#method.max_depth
1111 pub fn filter_entry(self, predicate: P) -> FilterEntry<Self, P> {
1112 FilterEntry { it: self, predicate }
1113 }
1114
1115 /// Skips the current directory.
1116 ///
1117 /// This causes the iterator to stop traversing the contents of the least
1118 /// recently yielded directory. This means any remaining entries in that
1119 /// directory will be skipped (including sub-directories).
1120 ///
1121 /// Note that the ergonomics of this method are questionable since it
1122 /// borrows the iterator mutably. Namely, you must write out the looping
1123 /// condition manually. For example, to skip hidden entries efficiently on
1124 /// unix systems:
1125 ///
1126 /// ```no_run
1127 /// use walkdir::{DirEntry, WalkDir};
1128 ///
1129 /// fn is_hidden(entry: &DirEntry) -> bool {
1130 /// entry.file_name()
1131 /// .to_str()
1132 /// .map(|s| s.starts_with("."))
1133 /// .unwrap_or(false)
1134 /// }
1135 ///
1136 /// let mut it = WalkDir::new("foo").into_iter();
1137 /// loop {
1138 /// let entry = match it.next() {
1139 /// None => break,
1140 /// Some(Err(err)) => panic!("ERROR: {}", err),
1141 /// Some(Ok(entry)) => entry,
1142 /// };
1143 /// if is_hidden(&entry) {
1144 /// if entry.file_type().is_dir() {
1145 /// it.skip_current_dir();
1146 /// }
1147 /// continue;
1148 /// }
1149 /// println!("{}", entry.path().display());
1150 /// }
1151 /// ```
1152 ///
1153 /// You may find it more convenient to use the [`filter_entry`] iterator
1154 /// adapter. (See its documentation for the same example functionality as
1155 /// above.)
1156 ///
1157 /// [`filter_entry`]: #method.filter_entry
1158 pub fn skip_current_dir(&mut self) {
1159 self.it.skip_current_dir();
1160 }
1161}
1162