| 1 | /*! |
| 2 | The ignore crate provides a fast recursive directory iterator that respects |
| 3 | various filters such as globs, file types and `.gitignore` files. The precise |
| 4 | matching rules and precedence is explained in the documentation for |
| 5 | `WalkBuilder`. |
| 6 | |
| 7 | Secondarily, this crate exposes gitignore and file type matchers for use cases |
| 8 | that demand more fine-grained control. |
| 9 | |
| 10 | # Example |
| 11 | |
| 12 | This example shows the most basic usage of this crate. This code will |
| 13 | recursively traverse the current directory while automatically filtering out |
| 14 | files and directories according to ignore globs found in files like |
| 15 | `.ignore` and `.gitignore`: |
| 16 | |
| 17 | |
| 18 | ```rust,no_run |
| 19 | use ignore::Walk; |
| 20 | |
| 21 | for result in Walk::new("./" ) { |
| 22 | // Each item yielded by the iterator is either a directory entry or an |
| 23 | // error, so either print the path or the error. |
| 24 | match result { |
| 25 | Ok(entry) => println!("{}" , entry.path().display()), |
| 26 | Err(err) => println!("ERROR: {}" , err), |
| 27 | } |
| 28 | } |
| 29 | ``` |
| 30 | |
| 31 | # Example: advanced |
| 32 | |
| 33 | By default, the recursive directory iterator will ignore hidden files and |
| 34 | directories. This can be disabled by building the iterator with `WalkBuilder`: |
| 35 | |
| 36 | ```rust,no_run |
| 37 | use ignore::WalkBuilder; |
| 38 | |
| 39 | for result in WalkBuilder::new("./" ).hidden(false).build() { |
| 40 | println!("{:?}" , result); |
| 41 | } |
| 42 | ``` |
| 43 | |
| 44 | See the documentation for `WalkBuilder` for many other options. |
| 45 | */ |
| 46 | |
| 47 | #![deny (missing_docs)] |
| 48 | |
| 49 | use std::path::{Path, PathBuf}; |
| 50 | |
| 51 | pub use crate::walk::{ |
| 52 | DirEntry, ParallelVisitor, ParallelVisitorBuilder, Walk, WalkBuilder, |
| 53 | WalkParallel, WalkState, |
| 54 | }; |
| 55 | |
| 56 | mod default_types; |
| 57 | mod dir; |
| 58 | pub mod gitignore; |
| 59 | pub mod overrides; |
| 60 | mod pathutil; |
| 61 | pub mod types; |
| 62 | mod walk; |
| 63 | |
| 64 | /// Represents an error that can occur when parsing a gitignore file. |
| 65 | #[derive (Debug)] |
| 66 | pub enum Error { |
| 67 | /// A collection of "soft" errors. These occur when adding an ignore |
| 68 | /// file partially succeeded. |
| 69 | Partial(Vec<Error>), |
| 70 | /// An error associated with a specific line number. |
| 71 | WithLineNumber { |
| 72 | /// The line number. |
| 73 | line: u64, |
| 74 | /// The underlying error. |
| 75 | err: Box<Error>, |
| 76 | }, |
| 77 | /// An error associated with a particular file path. |
| 78 | WithPath { |
| 79 | /// The file path. |
| 80 | path: PathBuf, |
| 81 | /// The underlying error. |
| 82 | err: Box<Error>, |
| 83 | }, |
| 84 | /// An error associated with a particular directory depth when recursively |
| 85 | /// walking a directory. |
| 86 | WithDepth { |
| 87 | /// The directory depth. |
| 88 | depth: usize, |
| 89 | /// The underlying error. |
| 90 | err: Box<Error>, |
| 91 | }, |
| 92 | /// An error that occurs when a file loop is detected when traversing |
| 93 | /// symbolic links. |
| 94 | Loop { |
| 95 | /// The ancestor file path in the loop. |
| 96 | ancestor: PathBuf, |
| 97 | /// The child file path in the loop. |
| 98 | child: PathBuf, |
| 99 | }, |
| 100 | /// An error that occurs when doing I/O, such as reading an ignore file. |
| 101 | Io(std::io::Error), |
| 102 | /// An error that occurs when trying to parse a glob. |
| 103 | Glob { |
| 104 | /// The original glob that caused this error. This glob, when |
| 105 | /// available, always corresponds to the glob provided by an end user. |
| 106 | /// e.g., It is the glob as written in a `.gitignore` file. |
| 107 | /// |
| 108 | /// (This glob may be distinct from the glob that is actually |
| 109 | /// compiled, after accounting for `gitignore` semantics.) |
| 110 | glob: Option<String>, |
| 111 | /// The underlying glob error as a string. |
| 112 | err: String, |
| 113 | }, |
| 114 | /// A type selection for a file type that is not defined. |
| 115 | UnrecognizedFileType(String), |
| 116 | /// A user specified file type definition could not be parsed. |
| 117 | InvalidDefinition, |
| 118 | } |
| 119 | |
| 120 | impl Clone for Error { |
| 121 | fn clone(&self) -> Error { |
| 122 | match *self { |
| 123 | Error::Partial(ref errs) => Error::Partial(errs.clone()), |
| 124 | Error::WithLineNumber { line, ref err } => { |
| 125 | Error::WithLineNumber { line, err: err.clone() } |
| 126 | } |
| 127 | Error::WithPath { ref path, ref err } => { |
| 128 | Error::WithPath { path: path.clone(), err: err.clone() } |
| 129 | } |
| 130 | Error::WithDepth { depth, ref err } => { |
| 131 | Error::WithDepth { depth, err: err.clone() } |
| 132 | } |
| 133 | Error::Loop { ref ancestor, ref child } => Error::Loop { |
| 134 | ancestor: ancestor.clone(), |
| 135 | child: child.clone(), |
| 136 | }, |
| 137 | Error::Io(ref err) => match err.raw_os_error() { |
| 138 | Some(e) => Error::Io(std::io::Error::from_raw_os_error(e)), |
| 139 | None => { |
| 140 | Error::Io(std::io::Error::new(err.kind(), err.to_string())) |
| 141 | } |
| 142 | }, |
| 143 | Error::Glob { ref glob, ref err } => { |
| 144 | Error::Glob { glob: glob.clone(), err: err.clone() } |
| 145 | } |
| 146 | Error::UnrecognizedFileType(ref err) => { |
| 147 | Error::UnrecognizedFileType(err.clone()) |
| 148 | } |
| 149 | Error::InvalidDefinition => Error::InvalidDefinition, |
| 150 | } |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | impl Error { |
| 155 | /// Returns true if this is a partial error. |
| 156 | /// |
| 157 | /// A partial error occurs when only some operations failed while others |
| 158 | /// may have succeeded. For example, an ignore file may contain an invalid |
| 159 | /// glob among otherwise valid globs. |
| 160 | pub fn is_partial(&self) -> bool { |
| 161 | match *self { |
| 162 | Error::Partial(_) => true, |
| 163 | Error::WithLineNumber { ref err, .. } => err.is_partial(), |
| 164 | Error::WithPath { ref err, .. } => err.is_partial(), |
| 165 | Error::WithDepth { ref err, .. } => err.is_partial(), |
| 166 | _ => false, |
| 167 | } |
| 168 | } |
| 169 | |
| 170 | /// Returns true if this error is exclusively an I/O error. |
| 171 | pub fn is_io(&self) -> bool { |
| 172 | match *self { |
| 173 | Error::Partial(ref errs) => errs.len() == 1 && errs[0].is_io(), |
| 174 | Error::WithLineNumber { ref err, .. } => err.is_io(), |
| 175 | Error::WithPath { ref err, .. } => err.is_io(), |
| 176 | Error::WithDepth { ref err, .. } => err.is_io(), |
| 177 | Error::Loop { .. } => false, |
| 178 | Error::Io(_) => true, |
| 179 | Error::Glob { .. } => false, |
| 180 | Error::UnrecognizedFileType(_) => false, |
| 181 | Error::InvalidDefinition => false, |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | /// Inspect the original [`std::io::Error`] if there is one. |
| 186 | /// |
| 187 | /// [`None`] is returned if the [`Error`] doesn't correspond to an |
| 188 | /// [`std::io::Error`]. This might happen, for example, when the error was |
| 189 | /// produced because a cycle was found in the directory tree while |
| 190 | /// following symbolic links. |
| 191 | /// |
| 192 | /// This method returns a borrowed value that is bound to the lifetime of the [`Error`]. To |
| 193 | /// obtain an owned value, the [`into_io_error`] can be used instead. |
| 194 | /// |
| 195 | /// > This is the original [`std::io::Error`] and is _not_ the same as |
| 196 | /// > [`impl From<Error> for std::io::Error`][impl] which contains |
| 197 | /// > additional context about the error. |
| 198 | /// |
| 199 | /// [`None`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html#variant.None |
| 200 | /// [`std::io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html |
| 201 | /// [`From`]: https://doc.rust-lang.org/stable/std/convert/trait.From.html |
| 202 | /// [`Error`]: struct.Error.html |
| 203 | /// [`into_io_error`]: struct.Error.html#method.into_io_error |
| 204 | /// [impl]: struct.Error.html#impl-From%3CError%3E |
| 205 | pub fn io_error(&self) -> Option<&std::io::Error> { |
| 206 | match *self { |
| 207 | Error::Partial(ref errs) => { |
| 208 | if errs.len() == 1 { |
| 209 | errs[0].io_error() |
| 210 | } else { |
| 211 | None |
| 212 | } |
| 213 | } |
| 214 | Error::WithLineNumber { ref err, .. } => err.io_error(), |
| 215 | Error::WithPath { ref err, .. } => err.io_error(), |
| 216 | Error::WithDepth { ref err, .. } => err.io_error(), |
| 217 | Error::Loop { .. } => None, |
| 218 | Error::Io(ref err) => Some(err), |
| 219 | Error::Glob { .. } => None, |
| 220 | Error::UnrecognizedFileType(_) => None, |
| 221 | Error::InvalidDefinition => None, |
| 222 | } |
| 223 | } |
| 224 | |
| 225 | /// Similar to [`io_error`] except consumes self to convert to the original |
| 226 | /// [`std::io::Error`] if one exists. |
| 227 | /// |
| 228 | /// [`io_error`]: struct.Error.html#method.io_error |
| 229 | /// [`std::io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html |
| 230 | pub fn into_io_error(self) -> Option<std::io::Error> { |
| 231 | match self { |
| 232 | Error::Partial(mut errs) => { |
| 233 | if errs.len() == 1 { |
| 234 | errs.remove(0).into_io_error() |
| 235 | } else { |
| 236 | None |
| 237 | } |
| 238 | } |
| 239 | Error::WithLineNumber { err, .. } => err.into_io_error(), |
| 240 | Error::WithPath { err, .. } => err.into_io_error(), |
| 241 | Error::WithDepth { err, .. } => err.into_io_error(), |
| 242 | Error::Loop { .. } => None, |
| 243 | Error::Io(err) => Some(err), |
| 244 | Error::Glob { .. } => None, |
| 245 | Error::UnrecognizedFileType(_) => None, |
| 246 | Error::InvalidDefinition => None, |
| 247 | } |
| 248 | } |
| 249 | |
| 250 | /// Returns a depth associated with recursively walking a directory (if |
| 251 | /// this error was generated from a recursive directory iterator). |
| 252 | pub fn depth(&self) -> Option<usize> { |
| 253 | match *self { |
| 254 | Error::WithPath { ref err, .. } => err.depth(), |
| 255 | Error::WithDepth { depth, .. } => Some(depth), |
| 256 | _ => None, |
| 257 | } |
| 258 | } |
| 259 | |
| 260 | /// Turn an error into a tagged error with the given file path. |
| 261 | fn with_path<P: AsRef<Path>>(self, path: P) -> Error { |
| 262 | Error::WithPath { |
| 263 | path: path.as_ref().to_path_buf(), |
| 264 | err: Box::new(self), |
| 265 | } |
| 266 | } |
| 267 | |
| 268 | /// Turn an error into a tagged error with the given depth. |
| 269 | fn with_depth(self, depth: usize) -> Error { |
| 270 | Error::WithDepth { depth, err: Box::new(self) } |
| 271 | } |
| 272 | |
| 273 | /// Turn an error into a tagged error with the given file path and line |
| 274 | /// number. If path is empty, then it is omitted from the error. |
| 275 | fn tagged<P: AsRef<Path>>(self, path: P, lineno: u64) -> Error { |
| 276 | let errline = |
| 277 | Error::WithLineNumber { line: lineno, err: Box::new(self) }; |
| 278 | if path.as_ref().as_os_str().is_empty() { |
| 279 | return errline; |
| 280 | } |
| 281 | errline.with_path(path) |
| 282 | } |
| 283 | |
| 284 | /// Build an error from a walkdir error. |
| 285 | fn from_walkdir(err: walkdir::Error) -> Error { |
| 286 | let depth = err.depth(); |
| 287 | if let (Some(anc), Some(child)) = (err.loop_ancestor(), err.path()) { |
| 288 | return Error::WithDepth { |
| 289 | depth, |
| 290 | err: Box::new(Error::Loop { |
| 291 | ancestor: anc.to_path_buf(), |
| 292 | child: child.to_path_buf(), |
| 293 | }), |
| 294 | }; |
| 295 | } |
| 296 | let path = err.path().map(|p| p.to_path_buf()); |
| 297 | let mut ig_err = Error::Io(std::io::Error::from(err)); |
| 298 | if let Some(path) = path { |
| 299 | ig_err = Error::WithPath { path, err: Box::new(ig_err) }; |
| 300 | } |
| 301 | ig_err |
| 302 | } |
| 303 | } |
| 304 | |
| 305 | impl std::error::Error for Error { |
| 306 | #[allow (deprecated)] |
| 307 | fn description(&self) -> &str { |
| 308 | match *self { |
| 309 | Error::Partial(_) => "partial error" , |
| 310 | Error::WithLineNumber { ref err: &Box, .. } => err.description(), |
| 311 | Error::WithPath { ref err: &Box, .. } => err.description(), |
| 312 | Error::WithDepth { ref err: &Box, .. } => err.description(), |
| 313 | Error::Loop { .. } => "file system loop found" , |
| 314 | Error::Io(ref err: &Error) => err.description(), |
| 315 | Error::Glob { ref err: &String, .. } => err, |
| 316 | Error::UnrecognizedFileType(_) => "unrecognized file type" , |
| 317 | Error::InvalidDefinition => "invalid definition" , |
| 318 | } |
| 319 | } |
| 320 | } |
| 321 | |
| 322 | impl std::fmt::Display for Error { |
| 323 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
| 324 | match *self { |
| 325 | Error::Partial(ref errs) => { |
| 326 | let msgs: Vec<String> = |
| 327 | errs.iter().map(|err| err.to_string()).collect(); |
| 328 | write!(f, " {}" , msgs.join(" \n" )) |
| 329 | } |
| 330 | Error::WithLineNumber { line, ref err } => { |
| 331 | write!(f, "line {}: {}" , line, err) |
| 332 | } |
| 333 | Error::WithPath { ref path, ref err } => { |
| 334 | write!(f, " {}: {}" , path.display(), err) |
| 335 | } |
| 336 | Error::WithDepth { ref err, .. } => err.fmt(f), |
| 337 | Error::Loop { ref ancestor, ref child } => write!( |
| 338 | f, |
| 339 | "File system loop found: \ |
| 340 | {} points to an ancestor {}" , |
| 341 | child.display(), |
| 342 | ancestor.display() |
| 343 | ), |
| 344 | Error::Io(ref err) => err.fmt(f), |
| 345 | Error::Glob { glob: None, ref err } => write!(f, " {}" , err), |
| 346 | Error::Glob { glob: Some(ref glob), ref err } => { |
| 347 | write!(f, "error parsing glob ' {}': {}" , glob, err) |
| 348 | } |
| 349 | Error::UnrecognizedFileType(ref ty) => { |
| 350 | write!(f, "unrecognized file type: {}" , ty) |
| 351 | } |
| 352 | Error::InvalidDefinition => write!( |
| 353 | f, |
| 354 | "invalid definition (format is type:glob, e.g., \ |
| 355 | html:*.html)" |
| 356 | ), |
| 357 | } |
| 358 | } |
| 359 | } |
| 360 | |
| 361 | impl From<std::io::Error> for Error { |
| 362 | fn from(err: std::io::Error) -> Error { |
| 363 | Error::Io(err) |
| 364 | } |
| 365 | } |
| 366 | |
| 367 | #[derive (Debug, Default)] |
| 368 | struct PartialErrorBuilder(Vec<Error>); |
| 369 | |
| 370 | impl PartialErrorBuilder { |
| 371 | fn push(&mut self, err: Error) { |
| 372 | self.0.push(err); |
| 373 | } |
| 374 | |
| 375 | fn push_ignore_io(&mut self, err: Error) { |
| 376 | if !err.is_io() { |
| 377 | self.push(err); |
| 378 | } |
| 379 | } |
| 380 | |
| 381 | fn maybe_push(&mut self, err: Option<Error>) { |
| 382 | if let Some(err) = err { |
| 383 | self.push(err); |
| 384 | } |
| 385 | } |
| 386 | |
| 387 | fn maybe_push_ignore_io(&mut self, err: Option<Error>) { |
| 388 | if let Some(err) = err { |
| 389 | self.push_ignore_io(err); |
| 390 | } |
| 391 | } |
| 392 | |
| 393 | fn into_error_option(mut self) -> Option<Error> { |
| 394 | if self.0.is_empty() { |
| 395 | None |
| 396 | } else if self.0.len() == 1 { |
| 397 | Some(self.0.pop().unwrap()) |
| 398 | } else { |
| 399 | Some(Error::Partial(self.0)) |
| 400 | } |
| 401 | } |
| 402 | } |
| 403 | |
| 404 | /// The result of a glob match. |
| 405 | /// |
| 406 | /// The type parameter `T` typically refers to a type that provides more |
| 407 | /// information about a particular match. For example, it might identify |
| 408 | /// the specific gitignore file and the specific glob pattern that caused |
| 409 | /// the match. |
| 410 | #[derive (Clone, Debug)] |
| 411 | pub enum Match<T> { |
| 412 | /// The path didn't match any glob. |
| 413 | None, |
| 414 | /// The highest precedent glob matched indicates the path should be |
| 415 | /// ignored. |
| 416 | Ignore(T), |
| 417 | /// The highest precedent glob matched indicates the path should be |
| 418 | /// whitelisted. |
| 419 | Whitelist(T), |
| 420 | } |
| 421 | |
| 422 | impl<T> Match<T> { |
| 423 | /// Returns true if the match result didn't match any globs. |
| 424 | pub fn is_none(&self) -> bool { |
| 425 | match *self { |
| 426 | Match::None => true, |
| 427 | Match::Ignore(_) | Match::Whitelist(_) => false, |
| 428 | } |
| 429 | } |
| 430 | |
| 431 | /// Returns true if the match result implies the path should be ignored. |
| 432 | pub fn is_ignore(&self) -> bool { |
| 433 | match *self { |
| 434 | Match::Ignore(_) => true, |
| 435 | Match::None | Match::Whitelist(_) => false, |
| 436 | } |
| 437 | } |
| 438 | |
| 439 | /// Returns true if the match result implies the path should be |
| 440 | /// whitelisted. |
| 441 | pub fn is_whitelist(&self) -> bool { |
| 442 | match *self { |
| 443 | Match::Whitelist(_) => true, |
| 444 | Match::None | Match::Ignore(_) => false, |
| 445 | } |
| 446 | } |
| 447 | |
| 448 | /// Inverts the match so that `Ignore` becomes `Whitelist` and |
| 449 | /// `Whitelist` becomes `Ignore`. A non-match remains the same. |
| 450 | pub fn invert(self) -> Match<T> { |
| 451 | match self { |
| 452 | Match::None => Match::None, |
| 453 | Match::Ignore(t) => Match::Whitelist(t), |
| 454 | Match::Whitelist(t) => Match::Ignore(t), |
| 455 | } |
| 456 | } |
| 457 | |
| 458 | /// Return the value inside this match if it exists. |
| 459 | pub fn inner(&self) -> Option<&T> { |
| 460 | match *self { |
| 461 | Match::None => None, |
| 462 | Match::Ignore(ref t) => Some(t), |
| 463 | Match::Whitelist(ref t) => Some(t), |
| 464 | } |
| 465 | } |
| 466 | |
| 467 | /// Apply the given function to the value inside this match. |
| 468 | /// |
| 469 | /// If the match has no value, then return the match unchanged. |
| 470 | pub fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Match<U> { |
| 471 | match self { |
| 472 | Match::None => Match::None, |
| 473 | Match::Ignore(t) => Match::Ignore(f(t)), |
| 474 | Match::Whitelist(t) => Match::Whitelist(f(t)), |
| 475 | } |
| 476 | } |
| 477 | |
| 478 | /// Return the match if it is not none. Otherwise, return other. |
| 479 | pub fn or(self, other: Self) -> Self { |
| 480 | if self.is_none() { |
| 481 | other |
| 482 | } else { |
| 483 | self |
| 484 | } |
| 485 | } |
| 486 | } |
| 487 | |
| 488 | #[cfg (test)] |
| 489 | mod tests { |
| 490 | use std::{ |
| 491 | env, fs, |
| 492 | path::{Path, PathBuf}, |
| 493 | }; |
| 494 | |
| 495 | /// A convenient result type alias. |
| 496 | pub(crate) type Result<T> = |
| 497 | std::result::Result<T, Box<dyn std::error::Error + Send + Sync>>; |
| 498 | |
| 499 | macro_rules! err { |
| 500 | ($($tt:tt)*) => { |
| 501 | Box::<dyn std::error::Error + Send + Sync>::from(format!($($tt)*)) |
| 502 | } |
| 503 | } |
| 504 | |
| 505 | /// A simple wrapper for creating a temporary directory that is |
| 506 | /// automatically deleted when it's dropped. |
| 507 | /// |
| 508 | /// We use this in lieu of tempfile because tempfile brings in too many |
| 509 | /// dependencies. |
| 510 | #[derive (Debug)] |
| 511 | pub struct TempDir(PathBuf); |
| 512 | |
| 513 | impl Drop for TempDir { |
| 514 | fn drop(&mut self) { |
| 515 | fs::remove_dir_all(&self.0).unwrap(); |
| 516 | } |
| 517 | } |
| 518 | |
| 519 | impl TempDir { |
| 520 | /// Create a new empty temporary directory under the system's configured |
| 521 | /// temporary directory. |
| 522 | pub fn new() -> Result<TempDir> { |
| 523 | use std::sync::atomic::{AtomicUsize, Ordering}; |
| 524 | |
| 525 | static TRIES: usize = 100; |
| 526 | static COUNTER: AtomicUsize = AtomicUsize::new(0); |
| 527 | |
| 528 | let tmpdir = env::temp_dir(); |
| 529 | for _ in 0..TRIES { |
| 530 | let count = COUNTER.fetch_add(1, Ordering::SeqCst); |
| 531 | let path = tmpdir.join("rust-ignore" ).join(count.to_string()); |
| 532 | if path.is_dir() { |
| 533 | continue; |
| 534 | } |
| 535 | fs::create_dir_all(&path).map_err(|e| { |
| 536 | err!("failed to create {}: {}" , path.display(), e) |
| 537 | })?; |
| 538 | return Ok(TempDir(path)); |
| 539 | } |
| 540 | Err(err!("failed to create temp dir after {} tries" , TRIES)) |
| 541 | } |
| 542 | |
| 543 | /// Return the underlying path to this temporary directory. |
| 544 | pub fn path(&self) -> &Path { |
| 545 | &self.0 |
| 546 | } |
| 547 | } |
| 548 | } |
| 549 | |