1 | /*! |
2 | Crate `walkdir` provides an efficient and cross platform implementation |
3 | of recursive directory traversal. Several options are exposed to control |
4 | iteration, such as whether to follow symbolic links (default off), limit the |
5 | maximum number of simultaneous open file descriptors and the ability to |
6 | efficiently skip descending into directories. |
7 | |
8 | To use this crate, add `walkdir` as a dependency to your project's |
9 | `Cargo.toml`: |
10 | |
11 | ```toml |
12 | [dependencies] |
13 | walkdir = "2" |
14 | ``` |
15 | |
16 | # From the top |
17 | |
18 | The [`WalkDir`] type builds iterators. The [`DirEntry`] type describes values |
19 | yielded by the iterator. Finally, the [`Error`] type is a small wrapper around |
20 | [`std::io::Error`] with additional information, such as if a loop was detected |
21 | while following symbolic links (not enabled by default). |
22 | |
23 | [`WalkDir`]: struct.WalkDir.html |
24 | [`DirEntry`]: struct.DirEntry.html |
25 | [`Error`]: struct.Error.html |
26 | [`std::io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html |
27 | |
28 | # Example |
29 | |
30 | The following code recursively iterates over the directory given and prints |
31 | the path for each entry: |
32 | |
33 | ```no_run |
34 | use walkdir::WalkDir; |
35 | # use walkdir::Error; |
36 | |
37 | # fn try_main() -> Result<(), Error> { |
38 | for entry in WalkDir::new("foo" ) { |
39 | println!("{}" , entry?.path().display()); |
40 | } |
41 | # Ok(()) |
42 | # } |
43 | ``` |
44 | |
45 | Or, if you'd like to iterate over all entries and ignore any errors that |
46 | may arise, use [`filter_map`]. (e.g., This code below will silently skip |
47 | directories that the owner of the running process does not have permission to |
48 | access.) |
49 | |
50 | ```no_run |
51 | use walkdir::WalkDir; |
52 | |
53 | for entry in WalkDir::new("foo" ).into_iter().filter_map(|e| e.ok()) { |
54 | println!("{}" , entry.path().display()); |
55 | } |
56 | ``` |
57 | |
58 | [`filter_map`]: https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.filter_map |
59 | |
60 | # Example: follow symbolic links |
61 | |
62 | The same code as above, except [`follow_links`] is enabled: |
63 | |
64 | ```no_run |
65 | use walkdir::WalkDir; |
66 | # use walkdir::Error; |
67 | |
68 | # fn try_main() -> Result<(), Error> { |
69 | for entry in WalkDir::new("foo" ).follow_links(true) { |
70 | println!("{}" , entry?.path().display()); |
71 | } |
72 | # Ok(()) |
73 | # } |
74 | ``` |
75 | |
76 | [`follow_links`]: struct.WalkDir.html#method.follow_links |
77 | |
78 | # Example: skip hidden files and directories on unix |
79 | |
80 | This uses the [`filter_entry`] iterator adapter to avoid yielding hidden files |
81 | and directories efficiently (i.e. without recursing into hidden directories): |
82 | |
83 | ```no_run |
84 | use walkdir::{DirEntry, WalkDir}; |
85 | # use walkdir::Error; |
86 | |
87 | fn is_hidden(entry: &DirEntry) -> bool { |
88 | entry.file_name() |
89 | .to_str() |
90 | .map(|s| s.starts_with("." )) |
91 | .unwrap_or(false) |
92 | } |
93 | |
94 | # fn try_main() -> Result<(), Error> { |
95 | let walker = WalkDir::new("foo" ).into_iter(); |
96 | for entry in walker.filter_entry(|e| !is_hidden(e)) { |
97 | println!("{}" , entry?.path().display()); |
98 | } |
99 | # Ok(()) |
100 | # } |
101 | ``` |
102 | |
103 | [`filter_entry`]: struct.IntoIter.html#method.filter_entry |
104 | */ |
105 | |
106 | #![deny (missing_docs)] |
107 | #![allow (unknown_lints)] |
108 | |
109 | #[cfg (doctest)] |
110 | doc_comment::doctest!("../README.md" ); |
111 | |
112 | use std::cmp::{min, Ordering}; |
113 | use std::fmt; |
114 | use std::fs::{self, ReadDir}; |
115 | use std::io; |
116 | use std::path::{Path, PathBuf}; |
117 | use std::result; |
118 | use std::vec; |
119 | |
120 | use same_file::Handle; |
121 | |
122 | pub use crate::dent::DirEntry; |
123 | #[cfg (unix)] |
124 | pub use crate::dent::DirEntryExt; |
125 | pub use crate::error::Error; |
126 | |
127 | mod dent; |
128 | mod error; |
129 | #[cfg (test)] |
130 | mod tests; |
131 | mod util; |
132 | |
133 | /// Like try, but for iterators that return [`Option<Result<_, _>>`]. |
134 | /// |
135 | /// [`Option<Result<_, _>>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html |
136 | macro_rules! itry { |
137 | ($e:expr) => { |
138 | match $e { |
139 | Ok(v) => v, |
140 | Err(err) => return Some(Err(From::from(err))), |
141 | } |
142 | }; |
143 | } |
144 | |
145 | /// A result type for walkdir operations. |
146 | /// |
147 | /// Note that this result type embeds the error type in this crate. This |
148 | /// is only useful if you care about the additional information provided by |
149 | /// the error (such as the path associated with the error or whether a loop |
150 | /// was dectected). If you want things to Just Work, then you can use |
151 | /// [`io::Result`] instead since the error type in this package will |
152 | /// automatically convert to an [`io::Result`] when using the [`try!`] macro. |
153 | /// |
154 | /// [`io::Result`]: https://doc.rust-lang.org/stable/std/io/type.Result.html |
155 | /// [`try!`]: https://doc.rust-lang.org/stable/std/macro.try.html |
156 | pub type Result<T> = ::std::result::Result<T, Error>; |
157 | |
158 | /// A builder to create an iterator for recursively walking a directory. |
159 | /// |
160 | /// Results are returned in depth first fashion, with directories yielded |
161 | /// before their contents. If [`contents_first`] is true, contents are yielded |
162 | /// before their directories. The order is unspecified but if [`sort_by`] is |
163 | /// given, directory entries are sorted according to this function. Directory |
164 | /// entries `.` and `..` are always omitted. |
165 | /// |
166 | /// If an error occurs at any point during iteration, then it is returned in |
167 | /// place of its corresponding directory entry and iteration continues as |
168 | /// normal. If an error occurs while opening a directory for reading, then it |
169 | /// is not descended into (but the error is still yielded by the iterator). |
170 | /// Iteration may be stopped at any time. When the iterator is destroyed, all |
171 | /// resources associated with it are freed. |
172 | /// |
173 | /// [`contents_first`]: struct.WalkDir.html#method.contents_first |
174 | /// [`sort_by`]: struct.WalkDir.html#method.sort_by |
175 | /// |
176 | /// # Usage |
177 | /// |
178 | /// This type implements [`IntoIterator`] so that it may be used as the subject |
179 | /// of a `for` loop. You may need to call [`into_iter`] explicitly if you want |
180 | /// to use iterator adapters such as [`filter_entry`]. |
181 | /// |
182 | /// Idiomatic use of this type should use method chaining to set desired |
183 | /// options. For example, this only shows entries with a depth of `1`, `2` or |
184 | /// `3` (relative to `foo`): |
185 | /// |
186 | /// ```no_run |
187 | /// use walkdir::WalkDir; |
188 | /// # use walkdir::Error; |
189 | /// |
190 | /// # fn try_main() -> Result<(), Error> { |
191 | /// for entry in WalkDir::new("foo" ).min_depth(1).max_depth(3) { |
192 | /// println!("{}" , entry?.path().display()); |
193 | /// } |
194 | /// # Ok(()) |
195 | /// # } |
196 | /// ``` |
197 | /// |
198 | /// [`IntoIterator`]: https://doc.rust-lang.org/stable/std/iter/trait.IntoIterator.html |
199 | /// [`into_iter`]: https://doc.rust-lang.org/nightly/core/iter/trait.IntoIterator.html#tymethod.into_iter |
200 | /// [`filter_entry`]: struct.IntoIter.html#method.filter_entry |
201 | /// |
202 | /// Note that the iterator by default includes the top-most directory. Since |
203 | /// this is the only directory yielded with depth `0`, it is easy to ignore it |
204 | /// with the [`min_depth`] setting: |
205 | /// |
206 | /// ```no_run |
207 | /// use walkdir::WalkDir; |
208 | /// # use walkdir::Error; |
209 | /// |
210 | /// # fn try_main() -> Result<(), Error> { |
211 | /// for entry in WalkDir::new("foo" ).min_depth(1) { |
212 | /// println!("{}" , entry?.path().display()); |
213 | /// } |
214 | /// # Ok(()) |
215 | /// # } |
216 | /// ``` |
217 | /// |
218 | /// [`min_depth`]: struct.WalkDir.html#method.min_depth |
219 | /// |
220 | /// This will only return descendents of the `foo` directory and not `foo` |
221 | /// itself. |
222 | /// |
223 | /// # Loops |
224 | /// |
225 | /// This iterator (like most/all recursive directory iterators) assumes that |
226 | /// no loops can be made with *hard* links on your file system. In particular, |
227 | /// this would require creating a hard link to a directory such that it creates |
228 | /// a loop. On most platforms, this operation is illegal. |
229 | /// |
230 | /// Note that when following symbolic/soft links, loops are detected and an |
231 | /// error is reported. |
232 | #[derive (Debug)] |
233 | pub struct WalkDir { |
234 | opts: WalkDirOptions, |
235 | root: PathBuf, |
236 | } |
237 | |
238 | struct WalkDirOptions { |
239 | follow_links: bool, |
240 | max_open: usize, |
241 | min_depth: usize, |
242 | max_depth: usize, |
243 | sorter: Option< |
244 | Box< |
245 | dyn FnMut(&DirEntry, &DirEntry) -> Ordering |
246 | + Send |
247 | + Sync |
248 | + 'static, |
249 | >, |
250 | >, |
251 | contents_first: bool, |
252 | same_file_system: bool, |
253 | } |
254 | |
255 | impl fmt::Debug for WalkDirOptions { |
256 | fn fmt( |
257 | &self, |
258 | f: &mut fmt::Formatter<'_>, |
259 | ) -> result::Result<(), fmt::Error> { |
260 | let sorter_str: &str = if self.sorter.is_some() { |
261 | // FnMut isn't `Debug` |
262 | "Some(...)" |
263 | } else { |
264 | "None" |
265 | }; |
266 | f&mut DebugStruct<'_, '_>.debug_struct("WalkDirOptions" ) |
267 | .field("follow_links" , &self.follow_links) |
268 | .field("max_open" , &self.max_open) |
269 | .field("min_depth" , &self.min_depth) |
270 | .field("max_depth" , &self.max_depth) |
271 | .field("sorter" , &sorter_str) |
272 | .field("contents_first" , &self.contents_first) |
273 | .field(name:"same_file_system" , &self.same_file_system) |
274 | .finish() |
275 | } |
276 | } |
277 | |
278 | impl WalkDir { |
279 | /// Create a builder for a recursive directory iterator starting at the |
280 | /// file path `root`. If `root` is a directory, then it is the first item |
281 | /// yielded by the iterator. If `root` is a file, then it is the first |
282 | /// and only item yielded by the iterator. If `root` is a symlink, then it |
283 | /// is always followed for the purposes of directory traversal. (A root |
284 | /// `DirEntry` still obeys its documentation with respect to symlinks and |
285 | /// the `follow_links` setting.) |
286 | pub fn new<P: AsRef<Path>>(root: P) -> Self { |
287 | WalkDir { |
288 | opts: WalkDirOptions { |
289 | follow_links: false, |
290 | max_open: 10, |
291 | min_depth: 0, |
292 | max_depth: ::std::usize::MAX, |
293 | sorter: None, |
294 | contents_first: false, |
295 | same_file_system: false, |
296 | }, |
297 | root: root.as_ref().to_path_buf(), |
298 | } |
299 | } |
300 | |
301 | /// Set the minimum depth of entries yielded by the iterator. |
302 | /// |
303 | /// The smallest depth is `0` and always corresponds to the path given |
304 | /// to the `new` function on this type. Its direct descendents have depth |
305 | /// `1`, and their descendents have depth `2`, and so on. |
306 | pub fn min_depth(mut self, depth: usize) -> Self { |
307 | self.opts.min_depth = depth; |
308 | if self.opts.min_depth > self.opts.max_depth { |
309 | self.opts.min_depth = self.opts.max_depth; |
310 | } |
311 | self |
312 | } |
313 | |
314 | /// Set the maximum depth of entries yield by the iterator. |
315 | /// |
316 | /// The smallest depth is `0` and always corresponds to the path given |
317 | /// to the `new` function on this type. Its direct descendents have depth |
318 | /// `1`, and their descendents have depth `2`, and so on. |
319 | /// |
320 | /// Note that this will not simply filter the entries of the iterator, but |
321 | /// it will actually avoid descending into directories when the depth is |
322 | /// exceeded. |
323 | pub fn max_depth(mut self, depth: usize) -> Self { |
324 | self.opts.max_depth = depth; |
325 | if self.opts.max_depth < self.opts.min_depth { |
326 | self.opts.max_depth = self.opts.min_depth; |
327 | } |
328 | self |
329 | } |
330 | |
331 | /// Follow symbolic links. By default, this is disabled. |
332 | /// |
333 | /// When `yes` is `true`, symbolic links are followed as if they were |
334 | /// normal directories and files. If a symbolic link is broken or is |
335 | /// involved in a loop, an error is yielded. |
336 | /// |
337 | /// When enabled, the yielded [`DirEntry`] values represent the target of |
338 | /// the link while the path corresponds to the link. See the [`DirEntry`] |
339 | /// type for more details. |
340 | /// |
341 | /// [`DirEntry`]: struct.DirEntry.html |
342 | pub fn follow_links(mut self, yes: bool) -> Self { |
343 | self.opts.follow_links = yes; |
344 | self |
345 | } |
346 | |
347 | /// Set the maximum number of simultaneously open file descriptors used |
348 | /// by the iterator. |
349 | /// |
350 | /// `n` must be greater than or equal to `1`. If `n` is `0`, then it is set |
351 | /// to `1` automatically. If this is not set, then it defaults to some |
352 | /// reasonably low number. |
353 | /// |
354 | /// This setting has no impact on the results yielded by the iterator |
355 | /// (even when `n` is `1`). Instead, this setting represents a trade off |
356 | /// between scarce resources (file descriptors) and memory. Namely, when |
357 | /// the maximum number of file descriptors is reached and a new directory |
358 | /// needs to be opened to continue iteration, then a previous directory |
359 | /// handle is closed and has its unyielded entries stored in memory. In |
360 | /// practice, this is a satisfying trade off because it scales with respect |
361 | /// to the *depth* of your file tree. Therefore, low values (even `1`) are |
362 | /// acceptable. |
363 | /// |
364 | /// Note that this value does not impact the number of system calls made by |
365 | /// an exhausted iterator. |
366 | /// |
367 | /// # Platform behavior |
368 | /// |
369 | /// On Windows, if `follow_links` is enabled, then this limit is not |
370 | /// respected. In particular, the maximum number of file descriptors opened |
371 | /// is proportional to the depth of the directory tree traversed. |
372 | pub fn max_open(mut self, mut n: usize) -> Self { |
373 | if n == 0 { |
374 | n = 1; |
375 | } |
376 | self.opts.max_open = n; |
377 | self |
378 | } |
379 | |
380 | /// Set a function for sorting directory entries with a comparator |
381 | /// function. |
382 | /// |
383 | /// If a compare function is set, the resulting iterator will return all |
384 | /// paths in sorted order. The compare function will be called to compare |
385 | /// entries from the same directory. |
386 | /// |
387 | /// ```rust,no_run |
388 | /// use std::cmp; |
389 | /// use std::ffi::OsString; |
390 | /// use walkdir::WalkDir; |
391 | /// |
392 | /// WalkDir::new("foo" ).sort_by(|a,b| a.file_name().cmp(b.file_name())); |
393 | /// ``` |
394 | pub fn sort_by<F>(mut self, cmp: F) -> Self |
395 | where |
396 | F: FnMut(&DirEntry, &DirEntry) -> Ordering + Send + Sync + 'static, |
397 | { |
398 | self.opts.sorter = Some(Box::new(cmp)); |
399 | self |
400 | } |
401 | |
402 | /// Set a function for sorting directory entries with a key extraction |
403 | /// function. |
404 | /// |
405 | /// If a compare function is set, the resulting iterator will return all |
406 | /// paths in sorted order. The compare function will be called to compare |
407 | /// entries from the same directory. |
408 | /// |
409 | /// ```rust,no_run |
410 | /// use std::cmp; |
411 | /// use std::ffi::OsString; |
412 | /// use walkdir::WalkDir; |
413 | /// |
414 | /// WalkDir::new("foo" ).sort_by_key(|a| a.file_name().to_owned()); |
415 | /// ``` |
416 | pub fn sort_by_key<K, F>(self, mut cmp: F) -> Self |
417 | where |
418 | F: FnMut(&DirEntry) -> K + Send + Sync + 'static, |
419 | K: Ord, |
420 | { |
421 | self.sort_by(move |a, b| cmp(a).cmp(&cmp(b))) |
422 | } |
423 | |
424 | /// Sort directory entries by file name, to ensure a deterministic order. |
425 | /// |
426 | /// This is a convenience function for calling `Self::sort_by()`. |
427 | /// |
428 | /// ```rust,no_run |
429 | /// use walkdir::WalkDir; |
430 | /// |
431 | /// WalkDir::new("foo" ).sort_by_file_name(); |
432 | /// ``` |
433 | pub fn sort_by_file_name(self) -> Self { |
434 | self.sort_by(|a, b| a.file_name().cmp(b.file_name())) |
435 | } |
436 | |
437 | /// Yield a directory's contents before the directory itself. By default, |
438 | /// this is disabled. |
439 | /// |
440 | /// When `yes` is `false` (as is the default), the directory is yielded |
441 | /// before its contents are read. This is useful when, e.g. you want to |
442 | /// skip processing of some directories. |
443 | /// |
444 | /// When `yes` is `true`, the iterator yields the contents of a directory |
445 | /// before yielding the directory itself. This is useful when, e.g. you |
446 | /// want to recursively delete a directory. |
447 | /// |
448 | /// # Example |
449 | /// |
450 | /// Assume the following directory tree: |
451 | /// |
452 | /// ```text |
453 | /// foo/ |
454 | /// abc/ |
455 | /// qrs |
456 | /// tuv |
457 | /// def/ |
458 | /// ``` |
459 | /// |
460 | /// With contents_first disabled (the default), the following code visits |
461 | /// the directory tree in depth-first order: |
462 | /// |
463 | /// ```no_run |
464 | /// use walkdir::WalkDir; |
465 | /// |
466 | /// for entry in WalkDir::new("foo" ) { |
467 | /// let entry = entry.unwrap(); |
468 | /// println!("{}" , entry.path().display()); |
469 | /// } |
470 | /// |
471 | /// // foo |
472 | /// // foo/abc |
473 | /// // foo/abc/qrs |
474 | /// // foo/abc/tuv |
475 | /// // foo/def |
476 | /// ``` |
477 | /// |
478 | /// With contents_first enabled: |
479 | /// |
480 | /// ```no_run |
481 | /// use walkdir::WalkDir; |
482 | /// |
483 | /// for entry in WalkDir::new("foo" ).contents_first(true) { |
484 | /// let entry = entry.unwrap(); |
485 | /// println!("{}" , entry.path().display()); |
486 | /// } |
487 | /// |
488 | /// // foo/abc/qrs |
489 | /// // foo/abc/tuv |
490 | /// // foo/abc |
491 | /// // foo/def |
492 | /// // foo |
493 | /// ``` |
494 | pub fn contents_first(mut self, yes: bool) -> Self { |
495 | self.opts.contents_first = yes; |
496 | self |
497 | } |
498 | |
499 | /// Do not cross file system boundaries. |
500 | /// |
501 | /// When this option is enabled, directory traversal will not descend into |
502 | /// directories that are on a different file system from the root path. |
503 | /// |
504 | /// Currently, this option is only supported on Unix and Windows. If this |
505 | /// option is used on an unsupported platform, then directory traversal |
506 | /// will immediately return an error and will not yield any entries. |
507 | pub fn same_file_system(mut self, yes: bool) -> Self { |
508 | self.opts.same_file_system = yes; |
509 | self |
510 | } |
511 | } |
512 | |
513 | impl IntoIterator for WalkDir { |
514 | type Item = Result<DirEntry>; |
515 | type IntoIter = IntoIter; |
516 | |
517 | fn into_iter(self) -> IntoIter { |
518 | IntoIter { |
519 | opts: self.opts, |
520 | start: Some(self.root), |
521 | stack_list: vec![], |
522 | stack_path: vec![], |
523 | oldest_opened: 0, |
524 | depth: 0, |
525 | deferred_dirs: vec![], |
526 | root_device: None, |
527 | } |
528 | } |
529 | } |
530 | |
531 | /// An iterator for recursively descending into a directory. |
532 | /// |
533 | /// A value with this type must be constructed with the [`WalkDir`] type, which |
534 | /// uses a builder pattern to set options such as min/max depth, max open file |
535 | /// descriptors and whether the iterator should follow symbolic links. After |
536 | /// constructing a `WalkDir`, call [`.into_iter()`] at the end of the chain. |
537 | /// |
538 | /// The order of elements yielded by this iterator is unspecified. |
539 | /// |
540 | /// [`WalkDir`]: struct.WalkDir.html |
541 | /// [`.into_iter()`]: struct.WalkDir.html#into_iter.v |
542 | #[derive (Debug)] |
543 | pub struct IntoIter { |
544 | /// Options specified in the builder. Depths, max fds, etc. |
545 | opts: WalkDirOptions, |
546 | /// The start path. |
547 | /// |
548 | /// This is only `Some(...)` at the beginning. After the first iteration, |
549 | /// this is always `None`. |
550 | start: Option<PathBuf>, |
551 | /// A stack of open (up to max fd) or closed handles to directories. |
552 | /// An open handle is a plain [`fs::ReadDir`] while a closed handle is |
553 | /// a `Vec<fs::DirEntry>` corresponding to the as-of-yet consumed entries. |
554 | /// |
555 | /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html |
556 | stack_list: Vec<DirList>, |
557 | /// A stack of file paths. |
558 | /// |
559 | /// This is *only* used when [`follow_links`] is enabled. In all other |
560 | /// cases this stack is empty. |
561 | /// |
562 | /// [`follow_links`]: struct.WalkDir.html#method.follow_links |
563 | stack_path: Vec<Ancestor>, |
564 | /// An index into `stack_list` that points to the oldest open directory |
565 | /// handle. If the maximum fd limit is reached and a new directory needs to |
566 | /// be read, the handle at this index is closed before the new directory is |
567 | /// opened. |
568 | oldest_opened: usize, |
569 | /// The current depth of iteration (the length of the stack at the |
570 | /// beginning of each iteration). |
571 | depth: usize, |
572 | /// A list of DirEntries corresponding to directories, that are |
573 | /// yielded after their contents has been fully yielded. This is only |
574 | /// used when `contents_first` is enabled. |
575 | deferred_dirs: Vec<DirEntry>, |
576 | /// The device of the root file path when the first call to `next` was |
577 | /// made. |
578 | /// |
579 | /// If the `same_file_system` option isn't enabled, then this is always |
580 | /// `None`. Conversely, if it is enabled, this is always `Some(...)` after |
581 | /// handling the root path. |
582 | root_device: Option<u64>, |
583 | } |
584 | |
585 | /// An ancestor is an item in the directory tree traversed by walkdir, and is |
586 | /// used to check for loops in the tree when traversing symlinks. |
587 | #[derive (Debug)] |
588 | struct Ancestor { |
589 | /// The path of this ancestor. |
590 | path: PathBuf, |
591 | /// An open file to this ancesor. This is only used on Windows where |
592 | /// opening a file handle appears to be quite expensive, so we choose to |
593 | /// cache it. This comes at the cost of not respecting the file descriptor |
594 | /// limit set by the user. |
595 | #[cfg (windows)] |
596 | handle: Handle, |
597 | } |
598 | |
599 | impl Ancestor { |
600 | /// Create a new ancestor from the given directory path. |
601 | #[cfg (windows)] |
602 | fn new(dent: &DirEntry) -> io::Result<Ancestor> { |
603 | let handle = Handle::from_path(dent.path())?; |
604 | Ok(Ancestor { path: dent.path().to_path_buf(), handle }) |
605 | } |
606 | |
607 | /// Create a new ancestor from the given directory path. |
608 | #[cfg (not(windows))] |
609 | fn new(dent: &DirEntry) -> io::Result<Ancestor> { |
610 | Ok(Ancestor { path: dent.path().to_path_buf() }) |
611 | } |
612 | |
613 | /// Returns true if and only if the given open file handle corresponds to |
614 | /// the same directory as this ancestor. |
615 | #[cfg (windows)] |
616 | fn is_same(&self, child: &Handle) -> io::Result<bool> { |
617 | Ok(child == &self.handle) |
618 | } |
619 | |
620 | /// Returns true if and only if the given open file handle corresponds to |
621 | /// the same directory as this ancestor. |
622 | #[cfg (not(windows))] |
623 | fn is_same(&self, child: &Handle) -> io::Result<bool> { |
624 | Ok(child == &Handle::from_path(&self.path)?) |
625 | } |
626 | } |
627 | |
628 | /// A sequence of unconsumed directory entries. |
629 | /// |
630 | /// This represents the opened or closed state of a directory handle. When |
631 | /// open, future entries are read by iterating over the raw `fs::ReadDir`. |
632 | /// When closed, all future entries are read into memory. Iteration then |
633 | /// proceeds over a [`Vec<fs::DirEntry>`]. |
634 | /// |
635 | /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html |
636 | /// [`Vec<fs::DirEntry>`]: https://doc.rust-lang.org/stable/std/vec/struct.Vec.html |
637 | #[derive (Debug)] |
638 | enum DirList { |
639 | /// An opened handle. |
640 | /// |
641 | /// This includes the depth of the handle itself. |
642 | /// |
643 | /// If there was an error with the initial [`fs::read_dir`] call, then it |
644 | /// is stored here. (We use an [`Option<...>`] to make yielding the error |
645 | /// exactly once simpler.) |
646 | /// |
647 | /// [`fs::read_dir`]: https://doc.rust-lang.org/stable/std/fs/fn.read_dir.html |
648 | /// [`Option<...>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html |
649 | Opened { depth: usize, it: result::Result<ReadDir, Option<Error>> }, |
650 | /// A closed handle. |
651 | /// |
652 | /// All remaining directory entries are read into memory. |
653 | Closed(vec::IntoIter<Result<DirEntry>>), |
654 | } |
655 | |
656 | impl Iterator for IntoIter { |
657 | type Item = Result<DirEntry>; |
658 | /// Advances the iterator and returns the next value. |
659 | /// |
660 | /// # Errors |
661 | /// |
662 | /// If the iterator fails to retrieve the next value, this method returns |
663 | /// an error value. The error will be wrapped in an Option::Some. |
664 | fn next(&mut self) -> Option<Result<DirEntry>> { |
665 | if let Some(start) = self.start.take() { |
666 | if self.opts.same_file_system { |
667 | let result = util::device_num(&start) |
668 | .map_err(|e| Error::from_path(0, start.clone(), e)); |
669 | self.root_device = Some(itry!(result)); |
670 | } |
671 | let dent = itry!(DirEntry::from_path(0, start, false)); |
672 | if let Some(result) = self.handle_entry(dent) { |
673 | return Some(result); |
674 | } |
675 | } |
676 | while !self.stack_list.is_empty() { |
677 | self.depth = self.stack_list.len(); |
678 | if let Some(dentry) = self.get_deferred_dir() { |
679 | return Some(Ok(dentry)); |
680 | } |
681 | if self.depth > self.opts.max_depth { |
682 | // If we've exceeded the max depth, pop the current dir |
683 | // so that we don't descend. |
684 | self.pop(); |
685 | continue; |
686 | } |
687 | // Unwrap is safe here because we've verified above that |
688 | // `self.stack_list` is not empty |
689 | let next = self |
690 | .stack_list |
691 | .last_mut() |
692 | .expect("BUG: stack should be non-empty" ) |
693 | .next(); |
694 | match next { |
695 | None => self.pop(), |
696 | Some(Err(err)) => return Some(Err(err)), |
697 | Some(Ok(dent)) => { |
698 | if let Some(result) = self.handle_entry(dent) { |
699 | return Some(result); |
700 | } |
701 | } |
702 | } |
703 | } |
704 | if self.opts.contents_first { |
705 | self.depth = self.stack_list.len(); |
706 | if let Some(dentry) = self.get_deferred_dir() { |
707 | return Some(Ok(dentry)); |
708 | } |
709 | } |
710 | None |
711 | } |
712 | } |
713 | |
714 | impl IntoIter { |
715 | /// Skips the current directory. |
716 | /// |
717 | /// This causes the iterator to stop traversing the contents of the least |
718 | /// recently yielded directory. This means any remaining entries in that |
719 | /// directory will be skipped (including sub-directories). |
720 | /// |
721 | /// Note that the ergonomics of this method are questionable since it |
722 | /// borrows the iterator mutably. Namely, you must write out the looping |
723 | /// condition manually. For example, to skip hidden entries efficiently on |
724 | /// unix systems: |
725 | /// |
726 | /// ```no_run |
727 | /// use walkdir::{DirEntry, WalkDir}; |
728 | /// |
729 | /// fn is_hidden(entry: &DirEntry) -> bool { |
730 | /// entry.file_name() |
731 | /// .to_str() |
732 | /// .map(|s| s.starts_with("." )) |
733 | /// .unwrap_or(false) |
734 | /// } |
735 | /// |
736 | /// let mut it = WalkDir::new("foo" ).into_iter(); |
737 | /// loop { |
738 | /// let entry = match it.next() { |
739 | /// None => break, |
740 | /// Some(Err(err)) => panic!("ERROR: {}" , err), |
741 | /// Some(Ok(entry)) => entry, |
742 | /// }; |
743 | /// if is_hidden(&entry) { |
744 | /// if entry.file_type().is_dir() { |
745 | /// it.skip_current_dir(); |
746 | /// } |
747 | /// continue; |
748 | /// } |
749 | /// println!("{}" , entry.path().display()); |
750 | /// } |
751 | /// ``` |
752 | /// |
753 | /// You may find it more convenient to use the [`filter_entry`] iterator |
754 | /// adapter. (See its documentation for the same example functionality as |
755 | /// above.) |
756 | /// |
757 | /// [`filter_entry`]: #method.filter_entry |
758 | pub fn skip_current_dir(&mut self) { |
759 | if !self.stack_list.is_empty() { |
760 | self.pop(); |
761 | } |
762 | } |
763 | |
764 | /// Yields only entries which satisfy the given predicate and skips |
765 | /// descending into directories that do not satisfy the given predicate. |
766 | /// |
767 | /// The predicate is applied to all entries. If the predicate is |
768 | /// true, iteration carries on as normal. If the predicate is false, the |
769 | /// entry is ignored and if it is a directory, it is not descended into. |
770 | /// |
771 | /// This is often more convenient to use than [`skip_current_dir`]. For |
772 | /// example, to skip hidden files and directories efficiently on unix |
773 | /// systems: |
774 | /// |
775 | /// ```no_run |
776 | /// use walkdir::{DirEntry, WalkDir}; |
777 | /// # use walkdir::Error; |
778 | /// |
779 | /// fn is_hidden(entry: &DirEntry) -> bool { |
780 | /// entry.file_name() |
781 | /// .to_str() |
782 | /// .map(|s| s.starts_with("." )) |
783 | /// .unwrap_or(false) |
784 | /// } |
785 | /// |
786 | /// # fn try_main() -> Result<(), Error> { |
787 | /// for entry in WalkDir::new("foo" ) |
788 | /// .into_iter() |
789 | /// .filter_entry(|e| !is_hidden(e)) { |
790 | /// println!("{}" , entry?.path().display()); |
791 | /// } |
792 | /// # Ok(()) |
793 | /// # } |
794 | /// ``` |
795 | /// |
796 | /// Note that the iterator will still yield errors for reading entries that |
797 | /// may not satisfy the predicate. |
798 | /// |
799 | /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not |
800 | /// passed to this predicate. |
801 | /// |
802 | /// Note that if the iterator has `contents_first` enabled, then this |
803 | /// method is no different than calling the standard `Iterator::filter` |
804 | /// method (because directory entries are yielded after they've been |
805 | /// descended into). |
806 | /// |
807 | /// [`skip_current_dir`]: #method.skip_current_dir |
808 | /// [`min_depth`]: struct.WalkDir.html#method.min_depth |
809 | /// [`max_depth`]: struct.WalkDir.html#method.max_depth |
810 | pub fn filter_entry<P>(self, predicate: P) -> FilterEntry<Self, P> |
811 | where |
812 | P: FnMut(&DirEntry) -> bool, |
813 | { |
814 | FilterEntry { it: self, predicate } |
815 | } |
816 | |
817 | fn handle_entry( |
818 | &mut self, |
819 | mut dent: DirEntry, |
820 | ) -> Option<Result<DirEntry>> { |
821 | if self.opts.follow_links && dent.file_type().is_symlink() { |
822 | dent = itry!(self.follow(dent)); |
823 | } |
824 | let is_normal_dir = !dent.file_type().is_symlink() && dent.is_dir(); |
825 | if is_normal_dir { |
826 | if self.opts.same_file_system && dent.depth() > 0 { |
827 | if itry!(self.is_same_file_system(&dent)) { |
828 | itry!(self.push(&dent)); |
829 | } |
830 | } else { |
831 | itry!(self.push(&dent)); |
832 | } |
833 | } else if dent.depth() == 0 && dent.file_type().is_symlink() { |
834 | // As a special case, if we are processing a root entry, then we |
835 | // always follow it even if it's a symlink and follow_links is |
836 | // false. We are careful to not let this change the semantics of |
837 | // the DirEntry however. Namely, the DirEntry should still respect |
838 | // the follow_links setting. When it's disabled, it should report |
839 | // itself as a symlink. When it's enabled, it should always report |
840 | // itself as the target. |
841 | let md = itry!(fs::metadata(dent.path()).map_err(|err| { |
842 | Error::from_path(dent.depth(), dent.path().to_path_buf(), err) |
843 | })); |
844 | if md.file_type().is_dir() { |
845 | itry!(self.push(&dent)); |
846 | } |
847 | } |
848 | if is_normal_dir && self.opts.contents_first { |
849 | self.deferred_dirs.push(dent); |
850 | None |
851 | } else if self.skippable() { |
852 | None |
853 | } else { |
854 | Some(Ok(dent)) |
855 | } |
856 | } |
857 | |
858 | fn get_deferred_dir(&mut self) -> Option<DirEntry> { |
859 | if self.opts.contents_first { |
860 | if self.depth < self.deferred_dirs.len() { |
861 | // Unwrap is safe here because we've guaranteed that |
862 | // `self.deferred_dirs.len()` can never be less than 1 |
863 | let deferred: DirEntry = self |
864 | .deferred_dirs |
865 | .pop() |
866 | .expect("BUG: deferred_dirs should be non-empty" ); |
867 | if !self.skippable() { |
868 | return Some(deferred); |
869 | } |
870 | } |
871 | } |
872 | None |
873 | } |
874 | |
875 | fn push(&mut self, dent: &DirEntry) -> Result<()> { |
876 | // Make room for another open file descriptor if we've hit the max. |
877 | let free = |
878 | self.stack_list.len().checked_sub(self.oldest_opened).unwrap(); |
879 | if free == self.opts.max_open { |
880 | self.stack_list[self.oldest_opened].close(); |
881 | } |
882 | // Open a handle to reading the directory's entries. |
883 | let rd = fs::read_dir(dent.path()).map_err(|err| { |
884 | Some(Error::from_path(self.depth, dent.path().to_path_buf(), err)) |
885 | }); |
886 | let mut list = DirList::Opened { depth: self.depth, it: rd }; |
887 | if let Some(ref mut cmp) = self.opts.sorter { |
888 | let mut entries: Vec<_> = list.collect(); |
889 | entries.sort_by(|a, b| match (a, b) { |
890 | (&Ok(ref a), &Ok(ref b)) => cmp(a, b), |
891 | (&Err(_), &Err(_)) => Ordering::Equal, |
892 | (&Ok(_), &Err(_)) => Ordering::Greater, |
893 | (&Err(_), &Ok(_)) => Ordering::Less, |
894 | }); |
895 | list = DirList::Closed(entries.into_iter()); |
896 | } |
897 | if self.opts.follow_links { |
898 | let ancestor = Ancestor::new(&dent) |
899 | .map_err(|err| Error::from_io(self.depth, err))?; |
900 | self.stack_path.push(ancestor); |
901 | } |
902 | // We push this after stack_path since creating the Ancestor can fail. |
903 | // If it fails, then we return the error and won't descend. |
904 | self.stack_list.push(list); |
905 | // If we had to close out a previous directory stream, then we need to |
906 | // increment our index the oldest still-open stream. We do this only |
907 | // after adding to our stack, in order to ensure that the oldest_opened |
908 | // index remains valid. The worst that can happen is that an already |
909 | // closed stream will be closed again, which is a no-op. |
910 | // |
911 | // We could move the close of the stream above into this if-body, but |
912 | // then we would have more than the maximum number of file descriptors |
913 | // open at a particular point in time. |
914 | if free == self.opts.max_open { |
915 | // Unwrap is safe here because self.oldest_opened is guaranteed to |
916 | // never be greater than `self.stack_list.len()`, which implies |
917 | // that the subtraction won't underflow and that adding 1 will |
918 | // never overflow. |
919 | self.oldest_opened = self.oldest_opened.checked_add(1).unwrap(); |
920 | } |
921 | Ok(()) |
922 | } |
923 | |
924 | fn pop(&mut self) { |
925 | self.stack_list.pop().expect("BUG: cannot pop from empty stack" ); |
926 | if self.opts.follow_links { |
927 | self.stack_path.pop().expect("BUG: list/path stacks out of sync" ); |
928 | } |
929 | // If everything in the stack is already closed, then there is |
930 | // room for at least one more open descriptor and it will |
931 | // always be at the top of the stack. |
932 | self.oldest_opened = min(self.oldest_opened, self.stack_list.len()); |
933 | } |
934 | |
935 | fn follow(&self, mut dent: DirEntry) -> Result<DirEntry> { |
936 | dent = |
937 | DirEntry::from_path(self.depth, dent.path().to_path_buf(), true)?; |
938 | // The only way a symlink can cause a loop is if it points |
939 | // to a directory. Otherwise, it always points to a leaf |
940 | // and we can omit any loop checks. |
941 | if dent.is_dir() { |
942 | self.check_loop(dent.path())?; |
943 | } |
944 | Ok(dent) |
945 | } |
946 | |
947 | fn check_loop<P: AsRef<Path>>(&self, child: P) -> Result<()> { |
948 | let hchild = Handle::from_path(&child) |
949 | .map_err(|err| Error::from_io(self.depth, err))?; |
950 | for ancestor in self.stack_path.iter().rev() { |
951 | let is_same = ancestor |
952 | .is_same(&hchild) |
953 | .map_err(|err| Error::from_io(self.depth, err))?; |
954 | if is_same { |
955 | return Err(Error::from_loop( |
956 | self.depth, |
957 | &ancestor.path, |
958 | child.as_ref(), |
959 | )); |
960 | } |
961 | } |
962 | Ok(()) |
963 | } |
964 | |
965 | fn is_same_file_system(&mut self, dent: &DirEntry) -> Result<bool> { |
966 | let dent_device = util::device_num(dent.path()) |
967 | .map_err(|err| Error::from_entry(dent, err))?; |
968 | Ok(self |
969 | .root_device |
970 | .map(|d| d == dent_device) |
971 | .expect("BUG: called is_same_file_system without root device" )) |
972 | } |
973 | |
974 | fn skippable(&self) -> bool { |
975 | self.depth < self.opts.min_depth || self.depth > self.opts.max_depth |
976 | } |
977 | } |
978 | |
979 | impl DirList { |
980 | fn close(&mut self) { |
981 | if let DirList::Opened { .. } = *self { |
982 | *self = DirList::Closed(self.collect::<Vec<_>>().into_iter()); |
983 | } |
984 | } |
985 | } |
986 | |
987 | impl Iterator for DirList { |
988 | type Item = Result<DirEntry>; |
989 | |
990 | #[inline (always)] |
991 | fn next(&mut self) -> Option<Result<DirEntry>> { |
992 | match *self { |
993 | DirList::Closed(ref mut it: &mut IntoIter>) => it.next(), |
994 | DirList::Opened { depth: usize, ref mut it: &mut Result> } => match *it { |
995 | Err(ref mut err: &mut Option) => err.take().map(Err), |
996 | Ok(ref mut rd: &mut ReadDir) => rd.next().map(|r: Result| match r { |
997 | Ok(r: DirEntry) => DirEntry::from_entry(depth:depth + 1, &r), |
998 | Err(err: Error) => Err(Error::from_io(depth:depth + 1, err)), |
999 | }), |
1000 | }, |
1001 | } |
1002 | } |
1003 | } |
1004 | |
1005 | /// A recursive directory iterator that skips entries. |
1006 | /// |
1007 | /// Values of this type are created by calling [`.filter_entry()`] on an |
1008 | /// `IntoIter`, which is formed by calling [`.into_iter()`] on a `WalkDir`. |
1009 | /// |
1010 | /// Directories that fail the predicate `P` are skipped. Namely, they are |
1011 | /// never yielded and never descended into. |
1012 | /// |
1013 | /// Entries that are skipped with the [`min_depth`] and [`max_depth`] options |
1014 | /// are not passed through this filter. |
1015 | /// |
1016 | /// If opening a handle to a directory resulted in an error, then it is yielded |
1017 | /// and no corresponding call to the predicate is made. |
1018 | /// |
1019 | /// Type parameter `I` refers to the underlying iterator and `P` refers to the |
1020 | /// predicate, which is usually `FnMut(&DirEntry) -> bool`. |
1021 | /// |
1022 | /// [`.filter_entry()`]: struct.IntoIter.html#method.filter_entry |
1023 | /// [`.into_iter()`]: struct.WalkDir.html#into_iter.v |
1024 | /// [`min_depth`]: struct.WalkDir.html#method.min_depth |
1025 | /// [`max_depth`]: struct.WalkDir.html#method.max_depth |
1026 | #[derive (Debug)] |
1027 | pub struct FilterEntry<I, P> { |
1028 | it: I, |
1029 | predicate: P, |
1030 | } |
1031 | |
1032 | impl<P> Iterator for FilterEntry<IntoIter, P> |
1033 | where |
1034 | P: FnMut(&DirEntry) -> bool, |
1035 | { |
1036 | type Item = Result<DirEntry>; |
1037 | |
1038 | /// Advances the iterator and returns the next value. |
1039 | /// |
1040 | /// # Errors |
1041 | /// |
1042 | /// If the iterator fails to retrieve the next value, this method returns |
1043 | /// an error value. The error will be wrapped in an `Option::Some`. |
1044 | fn next(&mut self) -> Option<Result<DirEntry>> { |
1045 | loop { |
1046 | let dent = match self.it.next() { |
1047 | None => return None, |
1048 | Some(result) => itry!(result), |
1049 | }; |
1050 | if !(self.predicate)(&dent) { |
1051 | if dent.is_dir() { |
1052 | self.it.skip_current_dir(); |
1053 | } |
1054 | continue; |
1055 | } |
1056 | return Some(Ok(dent)); |
1057 | } |
1058 | } |
1059 | } |
1060 | |
1061 | impl<P> FilterEntry<IntoIter, P> |
1062 | where |
1063 | P: FnMut(&DirEntry) -> bool, |
1064 | { |
1065 | /// Yields only entries which satisfy the given predicate and skips |
1066 | /// descending into directories that do not satisfy the given predicate. |
1067 | /// |
1068 | /// The predicate is applied to all entries. If the predicate is |
1069 | /// true, iteration carries on as normal. If the predicate is false, the |
1070 | /// entry is ignored and if it is a directory, it is not descended into. |
1071 | /// |
1072 | /// This is often more convenient to use than [`skip_current_dir`]. For |
1073 | /// example, to skip hidden files and directories efficiently on unix |
1074 | /// systems: |
1075 | /// |
1076 | /// ```no_run |
1077 | /// use walkdir::{DirEntry, WalkDir}; |
1078 | /// # use walkdir::Error; |
1079 | /// |
1080 | /// fn is_hidden(entry: &DirEntry) -> bool { |
1081 | /// entry.file_name() |
1082 | /// .to_str() |
1083 | /// .map(|s| s.starts_with("." )) |
1084 | /// .unwrap_or(false) |
1085 | /// } |
1086 | /// |
1087 | /// # fn try_main() -> Result<(), Error> { |
1088 | /// for entry in WalkDir::new("foo" ) |
1089 | /// .into_iter() |
1090 | /// .filter_entry(|e| !is_hidden(e)) { |
1091 | /// println!("{}" , entry?.path().display()); |
1092 | /// } |
1093 | /// # Ok(()) |
1094 | /// # } |
1095 | /// ``` |
1096 | /// |
1097 | /// Note that the iterator will still yield errors for reading entries that |
1098 | /// may not satisfy the predicate. |
1099 | /// |
1100 | /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not |
1101 | /// passed to this predicate. |
1102 | /// |
1103 | /// Note that if the iterator has `contents_first` enabled, then this |
1104 | /// method is no different than calling the standard `Iterator::filter` |
1105 | /// method (because directory entries are yielded after they've been |
1106 | /// descended into). |
1107 | /// |
1108 | /// [`skip_current_dir`]: #method.skip_current_dir |
1109 | /// [`min_depth`]: struct.WalkDir.html#method.min_depth |
1110 | /// [`max_depth`]: struct.WalkDir.html#method.max_depth |
1111 | pub fn filter_entry(self, predicate: P) -> FilterEntry<Self, P> { |
1112 | FilterEntry { it: self, predicate } |
1113 | } |
1114 | |
1115 | /// Skips the current directory. |
1116 | /// |
1117 | /// This causes the iterator to stop traversing the contents of the least |
1118 | /// recently yielded directory. This means any remaining entries in that |
1119 | /// directory will be skipped (including sub-directories). |
1120 | /// |
1121 | /// Note that the ergonomics of this method are questionable since it |
1122 | /// borrows the iterator mutably. Namely, you must write out the looping |
1123 | /// condition manually. For example, to skip hidden entries efficiently on |
1124 | /// unix systems: |
1125 | /// |
1126 | /// ```no_run |
1127 | /// use walkdir::{DirEntry, WalkDir}; |
1128 | /// |
1129 | /// fn is_hidden(entry: &DirEntry) -> bool { |
1130 | /// entry.file_name() |
1131 | /// .to_str() |
1132 | /// .map(|s| s.starts_with("." )) |
1133 | /// .unwrap_or(false) |
1134 | /// } |
1135 | /// |
1136 | /// let mut it = WalkDir::new("foo" ).into_iter(); |
1137 | /// loop { |
1138 | /// let entry = match it.next() { |
1139 | /// None => break, |
1140 | /// Some(Err(err)) => panic!("ERROR: {}" , err), |
1141 | /// Some(Ok(entry)) => entry, |
1142 | /// }; |
1143 | /// if is_hidden(&entry) { |
1144 | /// if entry.file_type().is_dir() { |
1145 | /// it.skip_current_dir(); |
1146 | /// } |
1147 | /// continue; |
1148 | /// } |
1149 | /// println!("{}" , entry.path().display()); |
1150 | /// } |
1151 | /// ``` |
1152 | /// |
1153 | /// You may find it more convenient to use the [`filter_entry`] iterator |
1154 | /// adapter. (See its documentation for the same example functionality as |
1155 | /// above.) |
1156 | /// |
1157 | /// [`filter_entry`]: #method.filter_entry |
1158 | pub fn skip_current_dir(&mut self) { |
1159 | self.it.skip_current_dir(); |
1160 | } |
1161 | } |
1162 | |