1/*!
2Crate `walkdir` provides an efficient and cross platform implementation
3of recursive directory traversal. Several options are exposed to control
4iteration, such as whether to follow symbolic links (default off), limit the
5maximum number of simultaneous open file descriptors and the ability to
6efficiently skip descending into directories.
7
8To use this crate, add `walkdir` as a dependency to your project's
9`Cargo.toml`:
10
11```toml
12[dependencies]
13walkdir = "2"
14```
15
16# From the top
17
18The [`WalkDir`] type builds iterators. The [`DirEntry`] type describes values
19yielded by the iterator. Finally, the [`Error`] type is a small wrapper around
20[`std::io::Error`] with additional information, such as if a loop was detected
21while following symbolic links (not enabled by default).
22
23[`WalkDir`]: struct.WalkDir.html
24[`DirEntry`]: struct.DirEntry.html
25[`Error`]: struct.Error.html
26[`std::io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html
27
28# Example
29
30The following code recursively iterates over the directory given and prints
31the path for each entry:
32
33```no_run
34use walkdir::WalkDir;
35# use walkdir::Error;
36
37# fn try_main() -> Result<(), Error> {
38for entry in WalkDir::new("foo") {
39 println!("{}", entry?.path().display());
40}
41# Ok(())
42# }
43```
44
45Or, if you'd like to iterate over all entries and ignore any errors that
46may arise, use [`filter_map`]. (e.g., This code below will silently skip
47directories that the owner of the running process does not have permission to
48access.)
49
50```no_run
51use walkdir::WalkDir;
52
53for entry in WalkDir::new("foo").into_iter().filter_map(|e| e.ok()) {
54 println!("{}", entry.path().display());
55}
56```
57
58[`filter_map`]: https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.filter_map
59
60# Example: follow symbolic links
61
62The same code as above, except [`follow_links`] is enabled:
63
64```no_run
65use walkdir::WalkDir;
66# use walkdir::Error;
67
68# fn try_main() -> Result<(), Error> {
69for entry in WalkDir::new("foo").follow_links(true) {
70 println!("{}", entry?.path().display());
71}
72# Ok(())
73# }
74```
75
76[`follow_links`]: struct.WalkDir.html#method.follow_links
77
78# Example: skip hidden files and directories on unix
79
80This uses the [`filter_entry`] iterator adapter to avoid yielding hidden files
81and directories efficiently (i.e. without recursing into hidden directories):
82
83```no_run
84use walkdir::{DirEntry, WalkDir};
85# use walkdir::Error;
86
87fn is_hidden(entry: &DirEntry) -> bool {
88 entry.file_name()
89 .to_str()
90 .map(|s| s.starts_with("."))
91 .unwrap_or(false)
92}
93
94# fn try_main() -> Result<(), Error> {
95let walker = WalkDir::new("foo").into_iter();
96for entry in walker.filter_entry(|e| !is_hidden(e)) {
97 println!("{}", entry?.path().display());
98}
99# Ok(())
100# }
101```
102
103[`filter_entry`]: struct.IntoIter.html#method.filter_entry
104*/
105
106#![deny(missing_docs)]
107#![allow(unknown_lints)]
108
109#[cfg(doctest)]
110doc_comment::doctest!("../README.md");
111
112use std::cmp::{min, Ordering};
113use std::fmt;
114use std::fs::{self, ReadDir};
115use std::io;
116use std::path::{Path, PathBuf};
117use std::result;
118use std::vec;
119
120use same_file::Handle;
121
122pub use crate::dent::DirEntry;
123#[cfg(unix)]
124pub use crate::dent::DirEntryExt;
125pub use crate::error::Error;
126
127mod dent;
128mod error;
129#[cfg(test)]
130mod tests;
131mod util;
132
133/// Like try, but for iterators that return [`Option<Result<_, _>>`].
134///
135/// [`Option<Result<_, _>>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html
136macro_rules! itry {
137 ($e:expr) => {
138 match $e {
139 Ok(v) => v,
140 Err(err) => return Some(Err(From::from(err))),
141 }
142 };
143}
144
145/// A result type for walkdir operations.
146///
147/// Note that this result type embeds the error type in this crate. This
148/// is only useful if you care about the additional information provided by
149/// the error (such as the path associated with the error or whether a loop
150/// was dectected). If you want things to Just Work, then you can use
151/// [`io::Result`] instead since the error type in this package will
152/// automatically convert to an [`io::Result`] when using the [`try!`] macro.
153///
154/// [`io::Result`]: https://doc.rust-lang.org/stable/std/io/type.Result.html
155/// [`try!`]: https://doc.rust-lang.org/stable/std/macro.try.html
156pub type Result<T> = ::std::result::Result<T, Error>;
157
158/// A builder to create an iterator for recursively walking a directory.
159///
160/// Results are returned in depth first fashion, with directories yielded
161/// before their contents. If [`contents_first`] is true, contents are yielded
162/// before their directories. The order is unspecified but if [`sort_by`] is
163/// given, directory entries are sorted according to this function. Directory
164/// entries `.` and `..` are always omitted.
165///
166/// If an error occurs at any point during iteration, then it is returned in
167/// place of its corresponding directory entry and iteration continues as
168/// normal. If an error occurs while opening a directory for reading, then it
169/// is not descended into (but the error is still yielded by the iterator).
170/// Iteration may be stopped at any time. When the iterator is destroyed, all
171/// resources associated with it are freed.
172///
173/// [`contents_first`]: struct.WalkDir.html#method.contents_first
174/// [`sort_by`]: struct.WalkDir.html#method.sort_by
175///
176/// # Usage
177///
178/// This type implements [`IntoIterator`] so that it may be used as the subject
179/// of a `for` loop. You may need to call [`into_iter`] explicitly if you want
180/// to use iterator adapters such as [`filter_entry`].
181///
182/// Idiomatic use of this type should use method chaining to set desired
183/// options. For example, this only shows entries with a depth of `1`, `2` or
184/// `3` (relative to `foo`):
185///
186/// ```no_run
187/// use walkdir::WalkDir;
188/// # use walkdir::Error;
189///
190/// # fn try_main() -> Result<(), Error> {
191/// for entry in WalkDir::new("foo").min_depth(1).max_depth(3) {
192/// println!("{}", entry?.path().display());
193/// }
194/// # Ok(())
195/// # }
196/// ```
197///
198/// [`IntoIterator`]: https://doc.rust-lang.org/stable/std/iter/trait.IntoIterator.html
199/// [`into_iter`]: https://doc.rust-lang.org/nightly/core/iter/trait.IntoIterator.html#tymethod.into_iter
200/// [`filter_entry`]: struct.IntoIter.html#method.filter_entry
201///
202/// Note that the iterator by default includes the top-most directory. Since
203/// this is the only directory yielded with depth `0`, it is easy to ignore it
204/// with the [`min_depth`] setting:
205///
206/// ```no_run
207/// use walkdir::WalkDir;
208/// # use walkdir::Error;
209///
210/// # fn try_main() -> Result<(), Error> {
211/// for entry in WalkDir::new("foo").min_depth(1) {
212/// println!("{}", entry?.path().display());
213/// }
214/// # Ok(())
215/// # }
216/// ```
217///
218/// [`min_depth`]: struct.WalkDir.html#method.min_depth
219///
220/// This will only return descendents of the `foo` directory and not `foo`
221/// itself.
222///
223/// # Loops
224///
225/// This iterator (like most/all recursive directory iterators) assumes that
226/// no loops can be made with *hard* links on your file system. In particular,
227/// this would require creating a hard link to a directory such that it creates
228/// a loop. On most platforms, this operation is illegal.
229///
230/// Note that when following symbolic/soft links, loops are detected and an
231/// error is reported.
232#[derive(Debug)]
233pub struct WalkDir {
234 opts: WalkDirOptions,
235 root: PathBuf,
236}
237
238struct WalkDirOptions {
239 follow_links: bool,
240 follow_root_links: bool,
241 max_open: usize,
242 min_depth: usize,
243 max_depth: usize,
244 sorter: Option<
245 Box<
246 dyn FnMut(&DirEntry, &DirEntry) -> Ordering
247 + Send
248 + Sync
249 + 'static,
250 >,
251 >,
252 contents_first: bool,
253 same_file_system: bool,
254}
255
256impl fmt::Debug for WalkDirOptions {
257 fn fmt(
258 &self,
259 f: &mut fmt::Formatter<'_>,
260 ) -> result::Result<(), fmt::Error> {
261 let sorter_str = if self.sorter.is_some() {
262 // FnMut isn't `Debug`
263 "Some(...)"
264 } else {
265 "None"
266 };
267 f.debug_struct("WalkDirOptions")
268 .field("follow_links", &self.follow_links)
269 .field("follow_root_link", &self.follow_root_links)
270 .field("max_open", &self.max_open)
271 .field("min_depth", &self.min_depth)
272 .field("max_depth", &self.max_depth)
273 .field("sorter", &sorter_str)
274 .field("contents_first", &self.contents_first)
275 .field("same_file_system", &self.same_file_system)
276 .finish()
277 }
278}
279
280impl WalkDir {
281 /// Create a builder for a recursive directory iterator starting at the
282 /// file path `root`. If `root` is a directory, then it is the first item
283 /// yielded by the iterator. If `root` is a file, then it is the first
284 /// and only item yielded by the iterator. If `root` is a symlink, then it
285 /// is always followed for the purposes of directory traversal. (A root
286 /// `DirEntry` still obeys its documentation with respect to symlinks and
287 /// the `follow_links` setting.)
288 pub fn new<P: AsRef<Path>>(root: P) -> Self {
289 WalkDir {
290 opts: WalkDirOptions {
291 follow_links: false,
292 follow_root_links: true,
293 max_open: 10,
294 min_depth: 0,
295 max_depth: ::std::usize::MAX,
296 sorter: None,
297 contents_first: false,
298 same_file_system: false,
299 },
300 root: root.as_ref().to_path_buf(),
301 }
302 }
303
304 /// Set the minimum depth of entries yielded by the iterator.
305 ///
306 /// The smallest depth is `0` and always corresponds to the path given
307 /// to the `new` function on this type. Its direct descendents have depth
308 /// `1`, and their descendents have depth `2`, and so on.
309 pub fn min_depth(mut self, depth: usize) -> Self {
310 self.opts.min_depth = depth;
311 if self.opts.min_depth > self.opts.max_depth {
312 self.opts.min_depth = self.opts.max_depth;
313 }
314 self
315 }
316
317 /// Set the maximum depth of entries yield by the iterator.
318 ///
319 /// The smallest depth is `0` and always corresponds to the path given
320 /// to the `new` function on this type. Its direct descendents have depth
321 /// `1`, and their descendents have depth `2`, and so on.
322 ///
323 /// Note that this will not simply filter the entries of the iterator, but
324 /// it will actually avoid descending into directories when the depth is
325 /// exceeded.
326 pub fn max_depth(mut self, depth: usize) -> Self {
327 self.opts.max_depth = depth;
328 if self.opts.max_depth < self.opts.min_depth {
329 self.opts.max_depth = self.opts.min_depth;
330 }
331 self
332 }
333
334 /// Follow symbolic links. By default, this is disabled.
335 ///
336 /// When `yes` is `true`, symbolic links are followed as if they were
337 /// normal directories and files. If a symbolic link is broken or is
338 /// involved in a loop, an error is yielded.
339 ///
340 /// When enabled, the yielded [`DirEntry`] values represent the target of
341 /// the link while the path corresponds to the link. See the [`DirEntry`]
342 /// type for more details.
343 ///
344 /// [`DirEntry`]: struct.DirEntry.html
345 pub fn follow_links(mut self, yes: bool) -> Self {
346 self.opts.follow_links = yes;
347 self
348 }
349
350 /// Follow symbolic links if these are the root of the traversal.
351 /// By default, this is enabled.
352 ///
353 /// When `yes` is `true`, symbolic links on root paths are followed
354 /// which is effective if the symbolic link points to a directory.
355 /// If a symbolic link is broken or is involved in a loop, an error is yielded
356 /// as the first entry of the traversal.
357 ///
358 /// When enabled, the yielded [`DirEntry`] values represent the target of
359 /// the link while the path corresponds to the link. See the [`DirEntry`]
360 /// type for more details, and all future entries will be contained within
361 /// the resolved directory behind the symbolic link of the root path.
362 ///
363 /// [`DirEntry`]: struct.DirEntry.html
364 pub fn follow_root_links(mut self, yes: bool) -> Self {
365 self.opts.follow_root_links = yes;
366 self
367 }
368
369 /// Set the maximum number of simultaneously open file descriptors used
370 /// by the iterator.
371 ///
372 /// `n` must be greater than or equal to `1`. If `n` is `0`, then it is set
373 /// to `1` automatically. If this is not set, then it defaults to some
374 /// reasonably low number.
375 ///
376 /// This setting has no impact on the results yielded by the iterator
377 /// (even when `n` is `1`). Instead, this setting represents a trade off
378 /// between scarce resources (file descriptors) and memory. Namely, when
379 /// the maximum number of file descriptors is reached and a new directory
380 /// needs to be opened to continue iteration, then a previous directory
381 /// handle is closed and has its unyielded entries stored in memory. In
382 /// practice, this is a satisfying trade off because it scales with respect
383 /// to the *depth* of your file tree. Therefore, low values (even `1`) are
384 /// acceptable.
385 ///
386 /// Note that this value does not impact the number of system calls made by
387 /// an exhausted iterator.
388 ///
389 /// # Platform behavior
390 ///
391 /// On Windows, if `follow_links` is enabled, then this limit is not
392 /// respected. In particular, the maximum number of file descriptors opened
393 /// is proportional to the depth of the directory tree traversed.
394 pub fn max_open(mut self, mut n: usize) -> Self {
395 if n == 0 {
396 n = 1;
397 }
398 self.opts.max_open = n;
399 self
400 }
401
402 /// Set a function for sorting directory entries with a comparator
403 /// function.
404 ///
405 /// If a compare function is set, the resulting iterator will return all
406 /// paths in sorted order. The compare function will be called to compare
407 /// entries from the same directory.
408 ///
409 /// ```rust,no_run
410 /// use std::cmp;
411 /// use std::ffi::OsString;
412 /// use walkdir::WalkDir;
413 ///
414 /// WalkDir::new("foo").sort_by(|a,b| a.file_name().cmp(b.file_name()));
415 /// ```
416 pub fn sort_by<F>(mut self, cmp: F) -> Self
417 where
418 F: FnMut(&DirEntry, &DirEntry) -> Ordering + Send + Sync + 'static,
419 {
420 self.opts.sorter = Some(Box::new(cmp));
421 self
422 }
423
424 /// Set a function for sorting directory entries with a key extraction
425 /// function.
426 ///
427 /// If a compare function is set, the resulting iterator will return all
428 /// paths in sorted order. The compare function will be called to compare
429 /// entries from the same directory.
430 ///
431 /// ```rust,no_run
432 /// use std::cmp;
433 /// use std::ffi::OsString;
434 /// use walkdir::WalkDir;
435 ///
436 /// WalkDir::new("foo").sort_by_key(|a| a.file_name().to_owned());
437 /// ```
438 pub fn sort_by_key<K, F>(self, mut cmp: F) -> Self
439 where
440 F: FnMut(&DirEntry) -> K + Send + Sync + 'static,
441 K: Ord,
442 {
443 self.sort_by(move |a, b| cmp(a).cmp(&cmp(b)))
444 }
445
446 /// Sort directory entries by file name, to ensure a deterministic order.
447 ///
448 /// This is a convenience function for calling `Self::sort_by()`.
449 ///
450 /// ```rust,no_run
451 /// use walkdir::WalkDir;
452 ///
453 /// WalkDir::new("foo").sort_by_file_name();
454 /// ```
455 pub fn sort_by_file_name(self) -> Self {
456 self.sort_by(|a, b| a.file_name().cmp(b.file_name()))
457 }
458
459 /// Yield a directory's contents before the directory itself. By default,
460 /// this is disabled.
461 ///
462 /// When `yes` is `false` (as is the default), the directory is yielded
463 /// before its contents are read. This is useful when, e.g. you want to
464 /// skip processing of some directories.
465 ///
466 /// When `yes` is `true`, the iterator yields the contents of a directory
467 /// before yielding the directory itself. This is useful when, e.g. you
468 /// want to recursively delete a directory.
469 ///
470 /// # Example
471 ///
472 /// Assume the following directory tree:
473 ///
474 /// ```text
475 /// foo/
476 /// abc/
477 /// qrs
478 /// tuv
479 /// def/
480 /// ```
481 ///
482 /// With contents_first disabled (the default), the following code visits
483 /// the directory tree in depth-first order:
484 ///
485 /// ```no_run
486 /// use walkdir::WalkDir;
487 ///
488 /// for entry in WalkDir::new("foo") {
489 /// let entry = entry.unwrap();
490 /// println!("{}", entry.path().display());
491 /// }
492 ///
493 /// // foo
494 /// // foo/abc
495 /// // foo/abc/qrs
496 /// // foo/abc/tuv
497 /// // foo/def
498 /// ```
499 ///
500 /// With contents_first enabled:
501 ///
502 /// ```no_run
503 /// use walkdir::WalkDir;
504 ///
505 /// for entry in WalkDir::new("foo").contents_first(true) {
506 /// let entry = entry.unwrap();
507 /// println!("{}", entry.path().display());
508 /// }
509 ///
510 /// // foo/abc/qrs
511 /// // foo/abc/tuv
512 /// // foo/abc
513 /// // foo/def
514 /// // foo
515 /// ```
516 pub fn contents_first(mut self, yes: bool) -> Self {
517 self.opts.contents_first = yes;
518 self
519 }
520
521 /// Do not cross file system boundaries.
522 ///
523 /// When this option is enabled, directory traversal will not descend into
524 /// directories that are on a different file system from the root path.
525 ///
526 /// Currently, this option is only supported on Unix and Windows. If this
527 /// option is used on an unsupported platform, then directory traversal
528 /// will immediately return an error and will not yield any entries.
529 pub fn same_file_system(mut self, yes: bool) -> Self {
530 self.opts.same_file_system = yes;
531 self
532 }
533}
534
535impl IntoIterator for WalkDir {
536 type Item = Result<DirEntry>;
537 type IntoIter = IntoIter;
538
539 fn into_iter(self) -> IntoIter {
540 IntoIter {
541 opts: self.opts,
542 start: Some(self.root),
543 stack_list: vec![],
544 stack_path: vec![],
545 oldest_opened: 0,
546 depth: 0,
547 deferred_dirs: vec![],
548 root_device: None,
549 }
550 }
551}
552
553/// An iterator for recursively descending into a directory.
554///
555/// A value with this type must be constructed with the [`WalkDir`] type, which
556/// uses a builder pattern to set options such as min/max depth, max open file
557/// descriptors and whether the iterator should follow symbolic links. After
558/// constructing a `WalkDir`, call [`.into_iter()`] at the end of the chain.
559///
560/// The order of elements yielded by this iterator is unspecified.
561///
562/// [`WalkDir`]: struct.WalkDir.html
563/// [`.into_iter()`]: struct.WalkDir.html#into_iter.v
564#[derive(Debug)]
565pub struct IntoIter {
566 /// Options specified in the builder. Depths, max fds, etc.
567 opts: WalkDirOptions,
568 /// The start path.
569 ///
570 /// This is only `Some(...)` at the beginning. After the first iteration,
571 /// this is always `None`.
572 start: Option<PathBuf>,
573 /// A stack of open (up to max fd) or closed handles to directories.
574 /// An open handle is a plain [`fs::ReadDir`] while a closed handle is
575 /// a `Vec<fs::DirEntry>` corresponding to the as-of-yet consumed entries.
576 ///
577 /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html
578 stack_list: Vec<DirList>,
579 /// A stack of file paths.
580 ///
581 /// This is *only* used when [`follow_links`] is enabled. In all other
582 /// cases this stack is empty.
583 ///
584 /// [`follow_links`]: struct.WalkDir.html#method.follow_links
585 stack_path: Vec<Ancestor>,
586 /// An index into `stack_list` that points to the oldest open directory
587 /// handle. If the maximum fd limit is reached and a new directory needs to
588 /// be read, the handle at this index is closed before the new directory is
589 /// opened.
590 oldest_opened: usize,
591 /// The current depth of iteration (the length of the stack at the
592 /// beginning of each iteration).
593 depth: usize,
594 /// A list of DirEntries corresponding to directories, that are
595 /// yielded after their contents has been fully yielded. This is only
596 /// used when `contents_first` is enabled.
597 deferred_dirs: Vec<DirEntry>,
598 /// The device of the root file path when the first call to `next` was
599 /// made.
600 ///
601 /// If the `same_file_system` option isn't enabled, then this is always
602 /// `None`. Conversely, if it is enabled, this is always `Some(...)` after
603 /// handling the root path.
604 root_device: Option<u64>,
605}
606
607/// An ancestor is an item in the directory tree traversed by walkdir, and is
608/// used to check for loops in the tree when traversing symlinks.
609#[derive(Debug)]
610struct Ancestor {
611 /// The path of this ancestor.
612 path: PathBuf,
613 /// An open file to this ancesor. This is only used on Windows where
614 /// opening a file handle appears to be quite expensive, so we choose to
615 /// cache it. This comes at the cost of not respecting the file descriptor
616 /// limit set by the user.
617 #[cfg(windows)]
618 handle: Handle,
619}
620
621impl Ancestor {
622 /// Create a new ancestor from the given directory path.
623 #[cfg(windows)]
624 fn new(dent: &DirEntry) -> io::Result<Ancestor> {
625 let handle = Handle::from_path(dent.path())?;
626 Ok(Ancestor { path: dent.path().to_path_buf(), handle })
627 }
628
629 /// Create a new ancestor from the given directory path.
630 #[cfg(not(windows))]
631 fn new(dent: &DirEntry) -> io::Result<Ancestor> {
632 Ok(Ancestor { path: dent.path().to_path_buf() })
633 }
634
635 /// Returns true if and only if the given open file handle corresponds to
636 /// the same directory as this ancestor.
637 #[cfg(windows)]
638 fn is_same(&self, child: &Handle) -> io::Result<bool> {
639 Ok(child == &self.handle)
640 }
641
642 /// Returns true if and only if the given open file handle corresponds to
643 /// the same directory as this ancestor.
644 #[cfg(not(windows))]
645 fn is_same(&self, child: &Handle) -> io::Result<bool> {
646 Ok(child == &Handle::from_path(&self.path)?)
647 }
648}
649
650/// A sequence of unconsumed directory entries.
651///
652/// This represents the opened or closed state of a directory handle. When
653/// open, future entries are read by iterating over the raw `fs::ReadDir`.
654/// When closed, all future entries are read into memory. Iteration then
655/// proceeds over a [`Vec<fs::DirEntry>`].
656///
657/// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html
658/// [`Vec<fs::DirEntry>`]: https://doc.rust-lang.org/stable/std/vec/struct.Vec.html
659#[derive(Debug)]
660enum DirList {
661 /// An opened handle.
662 ///
663 /// This includes the depth of the handle itself.
664 ///
665 /// If there was an error with the initial [`fs::read_dir`] call, then it
666 /// is stored here. (We use an [`Option<...>`] to make yielding the error
667 /// exactly once simpler.)
668 ///
669 /// [`fs::read_dir`]: https://doc.rust-lang.org/stable/std/fs/fn.read_dir.html
670 /// [`Option<...>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html
671 Opened { depth: usize, it: result::Result<ReadDir, Option<Error>> },
672 /// A closed handle.
673 ///
674 /// All remaining directory entries are read into memory.
675 Closed(vec::IntoIter<Result<DirEntry>>),
676}
677
678impl Iterator for IntoIter {
679 type Item = Result<DirEntry>;
680 /// Advances the iterator and returns the next value.
681 ///
682 /// # Errors
683 ///
684 /// If the iterator fails to retrieve the next value, this method returns
685 /// an error value. The error will be wrapped in an Option::Some.
686 fn next(&mut self) -> Option<Result<DirEntry>> {
687 if let Some(start) = self.start.take() {
688 if self.opts.same_file_system {
689 let result = util::device_num(&start)
690 .map_err(|e| Error::from_path(0, start.clone(), e));
691 self.root_device = Some(itry!(result));
692 }
693 let dent = itry!(DirEntry::from_path(0, start, false));
694 if let Some(result) = self.handle_entry(dent) {
695 return Some(result);
696 }
697 }
698 while !self.stack_list.is_empty() {
699 self.depth = self.stack_list.len();
700 if let Some(dentry) = self.get_deferred_dir() {
701 return Some(Ok(dentry));
702 }
703 if self.depth > self.opts.max_depth {
704 // If we've exceeded the max depth, pop the current dir
705 // so that we don't descend.
706 self.pop();
707 continue;
708 }
709 // Unwrap is safe here because we've verified above that
710 // `self.stack_list` is not empty
711 let next = self
712 .stack_list
713 .last_mut()
714 .expect("BUG: stack should be non-empty")
715 .next();
716 match next {
717 None => self.pop(),
718 Some(Err(err)) => return Some(Err(err)),
719 Some(Ok(dent)) => {
720 if let Some(result) = self.handle_entry(dent) {
721 return Some(result);
722 }
723 }
724 }
725 }
726 if self.opts.contents_first {
727 self.depth = self.stack_list.len();
728 if let Some(dentry) = self.get_deferred_dir() {
729 return Some(Ok(dentry));
730 }
731 }
732 None
733 }
734}
735
736impl IntoIter {
737 /// Skips the current directory.
738 ///
739 /// This causes the iterator to stop traversing the contents of the least
740 /// recently yielded directory. This means any remaining entries in that
741 /// directory will be skipped (including sub-directories).
742 ///
743 /// Note that the ergonomics of this method are questionable since it
744 /// borrows the iterator mutably. Namely, you must write out the looping
745 /// condition manually. For example, to skip hidden entries efficiently on
746 /// unix systems:
747 ///
748 /// ```no_run
749 /// use walkdir::{DirEntry, WalkDir};
750 ///
751 /// fn is_hidden(entry: &DirEntry) -> bool {
752 /// entry.file_name()
753 /// .to_str()
754 /// .map(|s| s.starts_with("."))
755 /// .unwrap_or(false)
756 /// }
757 ///
758 /// let mut it = WalkDir::new("foo").into_iter();
759 /// loop {
760 /// let entry = match it.next() {
761 /// None => break,
762 /// Some(Err(err)) => panic!("ERROR: {}", err),
763 /// Some(Ok(entry)) => entry,
764 /// };
765 /// if is_hidden(&entry) {
766 /// if entry.file_type().is_dir() {
767 /// it.skip_current_dir();
768 /// }
769 /// continue;
770 /// }
771 /// println!("{}", entry.path().display());
772 /// }
773 /// ```
774 ///
775 /// You may find it more convenient to use the [`filter_entry`] iterator
776 /// adapter. (See its documentation for the same example functionality as
777 /// above.)
778 ///
779 /// [`filter_entry`]: #method.filter_entry
780 pub fn skip_current_dir(&mut self) {
781 if !self.stack_list.is_empty() {
782 self.pop();
783 }
784 }
785
786 /// Yields only entries which satisfy the given predicate and skips
787 /// descending into directories that do not satisfy the given predicate.
788 ///
789 /// The predicate is applied to all entries. If the predicate is
790 /// true, iteration carries on as normal. If the predicate is false, the
791 /// entry is ignored and if it is a directory, it is not descended into.
792 ///
793 /// This is often more convenient to use than [`skip_current_dir`]. For
794 /// example, to skip hidden files and directories efficiently on unix
795 /// systems:
796 ///
797 /// ```no_run
798 /// use walkdir::{DirEntry, WalkDir};
799 /// # use walkdir::Error;
800 ///
801 /// fn is_hidden(entry: &DirEntry) -> bool {
802 /// entry.file_name()
803 /// .to_str()
804 /// .map(|s| s.starts_with("."))
805 /// .unwrap_or(false)
806 /// }
807 ///
808 /// # fn try_main() -> Result<(), Error> {
809 /// for entry in WalkDir::new("foo")
810 /// .into_iter()
811 /// .filter_entry(|e| !is_hidden(e)) {
812 /// println!("{}", entry?.path().display());
813 /// }
814 /// # Ok(())
815 /// # }
816 /// ```
817 ///
818 /// Note that the iterator will still yield errors for reading entries that
819 /// may not satisfy the predicate.
820 ///
821 /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not
822 /// passed to this predicate.
823 ///
824 /// Note that if the iterator has `contents_first` enabled, then this
825 /// method is no different than calling the standard `Iterator::filter`
826 /// method (because directory entries are yielded after they've been
827 /// descended into).
828 ///
829 /// [`skip_current_dir`]: #method.skip_current_dir
830 /// [`min_depth`]: struct.WalkDir.html#method.min_depth
831 /// [`max_depth`]: struct.WalkDir.html#method.max_depth
832 pub fn filter_entry<P>(self, predicate: P) -> FilterEntry<Self, P>
833 where
834 P: FnMut(&DirEntry) -> bool,
835 {
836 FilterEntry { it: self, predicate }
837 }
838
839 fn handle_entry(
840 &mut self,
841 mut dent: DirEntry,
842 ) -> Option<Result<DirEntry>> {
843 if self.opts.follow_links && dent.file_type().is_symlink() {
844 dent = itry!(self.follow(dent));
845 }
846 let is_normal_dir = !dent.file_type().is_symlink() && dent.is_dir();
847 if is_normal_dir {
848 if self.opts.same_file_system && dent.depth() > 0 {
849 if itry!(self.is_same_file_system(&dent)) {
850 itry!(self.push(&dent));
851 }
852 } else {
853 itry!(self.push(&dent));
854 }
855 } else if dent.depth() == 0
856 && dent.file_type().is_symlink()
857 && self.opts.follow_root_links
858 {
859 // As a special case, if we are processing a root entry, then we
860 // always follow it even if it's a symlink and follow_links is
861 // false. We are careful to not let this change the semantics of
862 // the DirEntry however. Namely, the DirEntry should still respect
863 // the follow_links setting. When it's disabled, it should report
864 // itself as a symlink. When it's enabled, it should always report
865 // itself as the target.
866 let md = itry!(fs::metadata(dent.path()).map_err(|err| {
867 Error::from_path(dent.depth(), dent.path().to_path_buf(), err)
868 }));
869 if md.file_type().is_dir() {
870 itry!(self.push(&dent));
871 }
872 }
873 if is_normal_dir && self.opts.contents_first {
874 self.deferred_dirs.push(dent);
875 None
876 } else if self.skippable() {
877 None
878 } else {
879 Some(Ok(dent))
880 }
881 }
882
883 fn get_deferred_dir(&mut self) -> Option<DirEntry> {
884 if self.opts.contents_first {
885 if self.depth < self.deferred_dirs.len() {
886 // Unwrap is safe here because we've guaranteed that
887 // `self.deferred_dirs.len()` can never be less than 1
888 let deferred: DirEntry = self
889 .deferred_dirs
890 .pop()
891 .expect("BUG: deferred_dirs should be non-empty");
892 if !self.skippable() {
893 return Some(deferred);
894 }
895 }
896 }
897 None
898 }
899
900 fn push(&mut self, dent: &DirEntry) -> Result<()> {
901 // Make room for another open file descriptor if we've hit the max.
902 let free =
903 self.stack_list.len().checked_sub(self.oldest_opened).unwrap();
904 if free == self.opts.max_open {
905 self.stack_list[self.oldest_opened].close();
906 }
907 // Open a handle to reading the directory's entries.
908 let rd = fs::read_dir(dent.path()).map_err(|err| {
909 Some(Error::from_path(self.depth, dent.path().to_path_buf(), err))
910 });
911 let mut list = DirList::Opened { depth: self.depth, it: rd };
912 if let Some(ref mut cmp) = self.opts.sorter {
913 let mut entries: Vec<_> = list.collect();
914 entries.sort_by(|a, b| match (a, b) {
915 (&Ok(ref a), &Ok(ref b)) => cmp(a, b),
916 (&Err(_), &Err(_)) => Ordering::Equal,
917 (&Ok(_), &Err(_)) => Ordering::Greater,
918 (&Err(_), &Ok(_)) => Ordering::Less,
919 });
920 list = DirList::Closed(entries.into_iter());
921 }
922 if self.opts.follow_links {
923 let ancestor = Ancestor::new(&dent)
924 .map_err(|err| Error::from_io(self.depth, err))?;
925 self.stack_path.push(ancestor);
926 }
927 // We push this after stack_path since creating the Ancestor can fail.
928 // If it fails, then we return the error and won't descend.
929 self.stack_list.push(list);
930 // If we had to close out a previous directory stream, then we need to
931 // increment our index the oldest still-open stream. We do this only
932 // after adding to our stack, in order to ensure that the oldest_opened
933 // index remains valid. The worst that can happen is that an already
934 // closed stream will be closed again, which is a no-op.
935 //
936 // We could move the close of the stream above into this if-body, but
937 // then we would have more than the maximum number of file descriptors
938 // open at a particular point in time.
939 if free == self.opts.max_open {
940 // Unwrap is safe here because self.oldest_opened is guaranteed to
941 // never be greater than `self.stack_list.len()`, which implies
942 // that the subtraction won't underflow and that adding 1 will
943 // never overflow.
944 self.oldest_opened = self.oldest_opened.checked_add(1).unwrap();
945 }
946 Ok(())
947 }
948
949 fn pop(&mut self) {
950 self.stack_list.pop().expect("BUG: cannot pop from empty stack");
951 if self.opts.follow_links {
952 self.stack_path.pop().expect("BUG: list/path stacks out of sync");
953 }
954 // If everything in the stack is already closed, then there is
955 // room for at least one more open descriptor and it will
956 // always be at the top of the stack.
957 self.oldest_opened = min(self.oldest_opened, self.stack_list.len());
958 }
959
960 fn follow(&self, mut dent: DirEntry) -> Result<DirEntry> {
961 dent =
962 DirEntry::from_path(self.depth, dent.path().to_path_buf(), true)?;
963 // The only way a symlink can cause a loop is if it points
964 // to a directory. Otherwise, it always points to a leaf
965 // and we can omit any loop checks.
966 if dent.is_dir() {
967 self.check_loop(dent.path())?;
968 }
969 Ok(dent)
970 }
971
972 fn check_loop<P: AsRef<Path>>(&self, child: P) -> Result<()> {
973 let hchild = Handle::from_path(&child)
974 .map_err(|err| Error::from_io(self.depth, err))?;
975 for ancestor in self.stack_path.iter().rev() {
976 let is_same = ancestor
977 .is_same(&hchild)
978 .map_err(|err| Error::from_io(self.depth, err))?;
979 if is_same {
980 return Err(Error::from_loop(
981 self.depth,
982 &ancestor.path,
983 child.as_ref(),
984 ));
985 }
986 }
987 Ok(())
988 }
989
990 fn is_same_file_system(&mut self, dent: &DirEntry) -> Result<bool> {
991 let dent_device = util::device_num(dent.path())
992 .map_err(|err| Error::from_entry(dent, err))?;
993 Ok(self
994 .root_device
995 .map(|d| d == dent_device)
996 .expect("BUG: called is_same_file_system without root device"))
997 }
998
999 fn skippable(&self) -> bool {
1000 self.depth < self.opts.min_depth || self.depth > self.opts.max_depth
1001 }
1002}
1003
1004impl DirList {
1005 fn close(&mut self) {
1006 if let DirList::Opened { .. } = *self {
1007 *self = DirList::Closed(self.collect::<Vec<_>>().into_iter());
1008 }
1009 }
1010}
1011
1012impl Iterator for DirList {
1013 type Item = Result<DirEntry>;
1014
1015 #[inline(always)]
1016 fn next(&mut self) -> Option<Result<DirEntry>> {
1017 match *self {
1018 DirList::Closed(ref mut it) => it.next(),
1019 DirList::Opened { depth, ref mut it } => match *it {
1020 Err(ref mut err) => err.take().map(Err),
1021 Ok(ref mut rd) => rd.next().map(|r| match r {
1022 Ok(r) => DirEntry::from_entry(depth + 1, &r),
1023 Err(err) => Err(Error::from_io(depth + 1, err)),
1024 }),
1025 },
1026 }
1027 }
1028}
1029
1030/// A recursive directory iterator that skips entries.
1031///
1032/// Values of this type are created by calling [`.filter_entry()`] on an
1033/// `IntoIter`, which is formed by calling [`.into_iter()`] on a `WalkDir`.
1034///
1035/// Directories that fail the predicate `P` are skipped. Namely, they are
1036/// never yielded and never descended into.
1037///
1038/// Entries that are skipped with the [`min_depth`] and [`max_depth`] options
1039/// are not passed through this filter.
1040///
1041/// If opening a handle to a directory resulted in an error, then it is yielded
1042/// and no corresponding call to the predicate is made.
1043///
1044/// Type parameter `I` refers to the underlying iterator and `P` refers to the
1045/// predicate, which is usually `FnMut(&DirEntry) -> bool`.
1046///
1047/// [`.filter_entry()`]: struct.IntoIter.html#method.filter_entry
1048/// [`.into_iter()`]: struct.WalkDir.html#into_iter.v
1049/// [`min_depth`]: struct.WalkDir.html#method.min_depth
1050/// [`max_depth`]: struct.WalkDir.html#method.max_depth
1051#[derive(Debug)]
1052pub struct FilterEntry<I, P> {
1053 it: I,
1054 predicate: P,
1055}
1056
1057impl<P> Iterator for FilterEntry<IntoIter, P>
1058where
1059 P: FnMut(&DirEntry) -> bool,
1060{
1061 type Item = Result<DirEntry>;
1062
1063 /// Advances the iterator and returns the next value.
1064 ///
1065 /// # Errors
1066 ///
1067 /// If the iterator fails to retrieve the next value, this method returns
1068 /// an error value. The error will be wrapped in an `Option::Some`.
1069 fn next(&mut self) -> Option<Result<DirEntry>> {
1070 loop {
1071 let dent = match self.it.next() {
1072 None => return None,
1073 Some(result) => itry!(result),
1074 };
1075 if !(self.predicate)(&dent) {
1076 if dent.is_dir() {
1077 self.it.skip_current_dir();
1078 }
1079 continue;
1080 }
1081 return Some(Ok(dent));
1082 }
1083 }
1084}
1085
1086impl<P> FilterEntry<IntoIter, P>
1087where
1088 P: FnMut(&DirEntry) -> bool,
1089{
1090 /// Yields only entries which satisfy the given predicate and skips
1091 /// descending into directories that do not satisfy the given predicate.
1092 ///
1093 /// The predicate is applied to all entries. If the predicate is
1094 /// true, iteration carries on as normal. If the predicate is false, the
1095 /// entry is ignored and if it is a directory, it is not descended into.
1096 ///
1097 /// This is often more convenient to use than [`skip_current_dir`]. For
1098 /// example, to skip hidden files and directories efficiently on unix
1099 /// systems:
1100 ///
1101 /// ```no_run
1102 /// use walkdir::{DirEntry, WalkDir};
1103 /// # use walkdir::Error;
1104 ///
1105 /// fn is_hidden(entry: &DirEntry) -> bool {
1106 /// entry.file_name()
1107 /// .to_str()
1108 /// .map(|s| s.starts_with("."))
1109 /// .unwrap_or(false)
1110 /// }
1111 ///
1112 /// # fn try_main() -> Result<(), Error> {
1113 /// for entry in WalkDir::new("foo")
1114 /// .into_iter()
1115 /// .filter_entry(|e| !is_hidden(e)) {
1116 /// println!("{}", entry?.path().display());
1117 /// }
1118 /// # Ok(())
1119 /// # }
1120 /// ```
1121 ///
1122 /// Note that the iterator will still yield errors for reading entries that
1123 /// may not satisfy the predicate.
1124 ///
1125 /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not
1126 /// passed to this predicate.
1127 ///
1128 /// Note that if the iterator has `contents_first` enabled, then this
1129 /// method is no different than calling the standard `Iterator::filter`
1130 /// method (because directory entries are yielded after they've been
1131 /// descended into).
1132 ///
1133 /// [`skip_current_dir`]: #method.skip_current_dir
1134 /// [`min_depth`]: struct.WalkDir.html#method.min_depth
1135 /// [`max_depth`]: struct.WalkDir.html#method.max_depth
1136 pub fn filter_entry(self, predicate: P) -> FilterEntry<Self, P> {
1137 FilterEntry { it: self, predicate }
1138 }
1139
1140 /// Skips the current directory.
1141 ///
1142 /// This causes the iterator to stop traversing the contents of the least
1143 /// recently yielded directory. This means any remaining entries in that
1144 /// directory will be skipped (including sub-directories).
1145 ///
1146 /// Note that the ergonomics of this method are questionable since it
1147 /// borrows the iterator mutably. Namely, you must write out the looping
1148 /// condition manually. For example, to skip hidden entries efficiently on
1149 /// unix systems:
1150 ///
1151 /// ```no_run
1152 /// use walkdir::{DirEntry, WalkDir};
1153 ///
1154 /// fn is_hidden(entry: &DirEntry) -> bool {
1155 /// entry.file_name()
1156 /// .to_str()
1157 /// .map(|s| s.starts_with("."))
1158 /// .unwrap_or(false)
1159 /// }
1160 ///
1161 /// let mut it = WalkDir::new("foo").into_iter();
1162 /// loop {
1163 /// let entry = match it.next() {
1164 /// None => break,
1165 /// Some(Err(err)) => panic!("ERROR: {}", err),
1166 /// Some(Ok(entry)) => entry,
1167 /// };
1168 /// if is_hidden(&entry) {
1169 /// if entry.file_type().is_dir() {
1170 /// it.skip_current_dir();
1171 /// }
1172 /// continue;
1173 /// }
1174 /// println!("{}", entry.path().display());
1175 /// }
1176 /// ```
1177 ///
1178 /// You may find it more convenient to use the [`filter_entry`] iterator
1179 /// adapter. (See its documentation for the same example functionality as
1180 /// above.)
1181 ///
1182 /// [`filter_entry`]: #method.filter_entry
1183 pub fn skip_current_dir(&mut self) {
1184 self.it.skip_current_dir();
1185 }
1186}
1187