1// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! Support for matching file paths against Unix shell style patterns.
12//!
13//! The `glob` and `glob_with` functions allow querying the filesystem for all
14//! files that match a particular pattern (similar to the libc `glob` function).
15//! The methods on the `Pattern` type provide functionality for checking if
16//! individual paths match a particular pattern (similar to the libc `fnmatch`
17//! function).
18//!
19//! For consistency across platforms, and for Windows support, this module
20//! is implemented entirely in Rust rather than deferring to the libc
21//! `glob`/`fnmatch` functions.
22//!
23//! # Examples
24//!
25//! To print all jpg files in `/media/` and all of its subdirectories.
26//!
27//! ```rust,no_run
28//! use glob::glob;
29//!
30//! for entry in glob("/media/**/*.jpg").expect("Failed to read glob pattern") {
31//! match entry {
32//! Ok(path) => println!("{:?}", path.display()),
33//! Err(e) => println!("{:?}", e),
34//! }
35//! }
36//! ```
37//!
38//! To print all files containing the letter "a", case insensitive, in a `local`
39//! directory relative to the current working directory. This ignores errors
40//! instead of printing them.
41//!
42//! ```rust,no_run
43//! use glob::glob_with;
44//! use glob::MatchOptions;
45//!
46//! let options = MatchOptions {
47//! case_sensitive: false,
48//! require_literal_separator: false,
49//! require_literal_leading_dot: false,
50//! };
51//! for entry in glob_with("local/*a*", options).unwrap() {
52//! if let Ok(path) = entry {
53//! println!("{:?}", path.display())
54//! }
55//! }
56//! ```
57
58#![doc(
59 html_logo_url = "https://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png",
60 html_favicon_url = "https://www.rust-lang.org/favicon.ico",
61 html_root_url = "https://docs.rs/glob/0.3.1"
62)]
63#![deny(missing_docs)]
64
65#[cfg(test)]
66#[macro_use]
67extern crate doc_comment;
68
69#[cfg(test)]
70doctest!("../README.md");
71
72use std::cmp;
73use std::error::Error;
74use std::fmt;
75use std::fs;
76use std::io;
77use std::path::{self, Component, Path, PathBuf};
78use std::str::FromStr;
79
80use CharSpecifier::{CharRange, SingleChar};
81use MatchResult::{EntirePatternDoesntMatch, Match, SubPatternDoesntMatch};
82use PatternToken::AnyExcept;
83use PatternToken::{AnyChar, AnyRecursiveSequence, AnySequence, AnyWithin, Char};
84
85/// An iterator that yields `Path`s from the filesystem that match a particular
86/// pattern.
87///
88/// Note that it yields `GlobResult` in order to report any `IoErrors` that may
89/// arise during iteration. If a directory matches but is unreadable,
90/// thereby preventing its contents from being checked for matches, a
91/// `GlobError` is returned to express this.
92///
93/// See the `glob` function for more details.
94#[derive(Debug)]
95pub struct Paths {
96 dir_patterns: Vec<Pattern>,
97 require_dir: bool,
98 options: MatchOptions,
99 todo: Vec<Result<(PathBuf, usize), GlobError>>,
100 scope: Option<PathBuf>,
101}
102
103/// Return an iterator that produces all the `Path`s that match the given
104/// pattern using default match options, which may be absolute or relative to
105/// the current working directory.
106///
107/// This may return an error if the pattern is invalid.
108///
109/// This method uses the default match options and is equivalent to calling
110/// `glob_with(pattern, MatchOptions::new())`. Use `glob_with` directly if you
111/// want to use non-default match options.
112///
113/// When iterating, each result is a `GlobResult` which expresses the
114/// possibility that there was an `IoError` when attempting to read the contents
115/// of the matched path. In other words, each item returned by the iterator
116/// will either be an `Ok(Path)` if the path matched, or an `Err(GlobError)` if
117/// the path (partially) matched _but_ its contents could not be read in order
118/// to determine if its contents matched.
119///
120/// See the `Paths` documentation for more information.
121///
122/// # Examples
123///
124/// Consider a directory `/media/pictures` containing only the files
125/// `kittens.jpg`, `puppies.jpg` and `hamsters.gif`:
126///
127/// ```rust,no_run
128/// use glob::glob;
129///
130/// for entry in glob("/media/pictures/*.jpg").unwrap() {
131/// match entry {
132/// Ok(path) => println!("{:?}", path.display()),
133///
134/// // if the path matched but was unreadable,
135/// // thereby preventing its contents from matching
136/// Err(e) => println!("{:?}", e),
137/// }
138/// }
139/// ```
140///
141/// The above code will print:
142///
143/// ```ignore
144/// /media/pictures/kittens.jpg
145/// /media/pictures/puppies.jpg
146/// ```
147///
148/// If you want to ignore unreadable paths, you can use something like
149/// `filter_map`:
150///
151/// ```rust
152/// use glob::glob;
153/// use std::result::Result;
154///
155/// for path in glob("/media/pictures/*.jpg").unwrap().filter_map(Result::ok) {
156/// println!("{}", path.display());
157/// }
158/// ```
159/// Paths are yielded in alphabetical order.
160pub fn glob(pattern: &str) -> Result<Paths, PatternError> {
161 glob_with(pattern, options:MatchOptions::new())
162}
163
164/// Return an iterator that produces all the `Path`s that match the given
165/// pattern using the specified match options, which may be absolute or relative
166/// to the current working directory.
167///
168/// This may return an error if the pattern is invalid.
169///
170/// This function accepts Unix shell style patterns as described by
171/// `Pattern::new(..)`. The options given are passed through unchanged to
172/// `Pattern::matches_with(..)` with the exception that
173/// `require_literal_separator` is always set to `true` regardless of the value
174/// passed to this function.
175///
176/// Paths are yielded in alphabetical order.
177pub fn glob_with(pattern: &str, options: MatchOptions) -> Result<Paths, PatternError> {
178 #[cfg(windows)]
179 fn check_windows_verbatim(p: &Path) -> bool {
180 match p.components().next() {
181 Some(Component::Prefix(ref p)) => p.kind().is_verbatim(),
182 _ => false,
183 }
184 }
185 #[cfg(not(windows))]
186 fn check_windows_verbatim(_: &Path) -> bool {
187 false
188 }
189
190 #[cfg(windows)]
191 fn to_scope(p: &Path) -> PathBuf {
192 // FIXME handle volume relative paths here
193 p.to_path_buf()
194 }
195 #[cfg(not(windows))]
196 fn to_scope(p: &Path) -> PathBuf {
197 p.to_path_buf()
198 }
199
200 // make sure that the pattern is valid first, else early return with error
201 if let Err(err) = Pattern::new(pattern) {
202 return Err(err);
203 }
204
205 let mut components = Path::new(pattern).components().peekable();
206 loop {
207 match components.peek() {
208 Some(&Component::Prefix(..)) | Some(&Component::RootDir) => {
209 components.next();
210 }
211 _ => break,
212 }
213 }
214 let rest = components.map(|s| s.as_os_str()).collect::<PathBuf>();
215 let normalized_pattern = Path::new(pattern).iter().collect::<PathBuf>();
216 let root_len = normalized_pattern.to_str().unwrap().len() - rest.to_str().unwrap().len();
217 let root = if root_len > 0 {
218 Some(Path::new(&pattern[..root_len]))
219 } else {
220 None
221 };
222
223 if root_len > 0 && check_windows_verbatim(root.unwrap()) {
224 // FIXME: How do we want to handle verbatim paths? I'm inclined to
225 // return nothing, since we can't very well find all UNC shares with a
226 // 1-letter server name.
227 return Ok(Paths {
228 dir_patterns: Vec::new(),
229 require_dir: false,
230 options,
231 todo: Vec::new(),
232 scope: None,
233 });
234 }
235
236 let scope = root.map_or_else(|| PathBuf::from("."), to_scope);
237
238 let mut dir_patterns = Vec::new();
239 let components =
240 pattern[cmp::min(root_len, pattern.len())..].split_terminator(path::is_separator);
241
242 for component in components {
243 dir_patterns.push(Pattern::new(component)?);
244 }
245
246 if root_len == pattern.len() {
247 dir_patterns.push(Pattern {
248 original: "".to_string(),
249 tokens: Vec::new(),
250 is_recursive: false,
251 });
252 }
253
254 let last_is_separator = pattern.chars().next_back().map(path::is_separator);
255 let require_dir = last_is_separator == Some(true);
256 let todo = Vec::new();
257
258 Ok(Paths {
259 dir_patterns,
260 require_dir,
261 options,
262 todo,
263 scope: Some(scope),
264 })
265}
266
267/// A glob iteration error.
268///
269/// This is typically returned when a particular path cannot be read
270/// to determine if its contents match the glob pattern. This is possible
271/// if the program lacks the appropriate permissions, for example.
272#[derive(Debug)]
273pub struct GlobError {
274 path: PathBuf,
275 error: io::Error,
276}
277
278impl GlobError {
279 /// The Path that the error corresponds to.
280 pub fn path(&self) -> &Path {
281 &self.path
282 }
283
284 /// The error in question.
285 pub fn error(&self) -> &io::Error {
286 &self.error
287 }
288
289 /// Consumes self, returning the _raw_ underlying `io::Error`
290 pub fn into_error(self) -> io::Error {
291 self.error
292 }
293}
294
295impl Error for GlobError {
296 #[allow(deprecated)]
297 fn description(&self) -> &str {
298 self.error.description()
299 }
300
301 #[allow(unknown_lints, bare_trait_objects)]
302 fn cause(&self) -> Option<&Error> {
303 Some(&self.error)
304 }
305}
306
307impl fmt::Display for GlobError {
308 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
309 write!(
310 f,
311 "attempting to read `{}` resulted in an error: {}",
312 self.path.display(),
313 self.error
314 )
315 }
316}
317
318fn is_dir(p: &Path) -> bool {
319 fs::metadata(p).map(|m| m.is_dir()).unwrap_or(default:false)
320}
321
322/// An alias for a glob iteration result.
323///
324/// This represents either a matched path or a glob iteration error,
325/// such as failing to read a particular directory's contents.
326pub type GlobResult = Result<PathBuf, GlobError>;
327
328impl Iterator for Paths {
329 type Item = GlobResult;
330
331 fn next(&mut self) -> Option<GlobResult> {
332 // the todo buffer hasn't been initialized yet, so it's done at this
333 // point rather than in glob() so that the errors are unified that is,
334 // failing to fill the buffer is an iteration error construction of the
335 // iterator (i.e. glob()) only fails if it fails to compile the Pattern
336 if let Some(scope) = self.scope.take() {
337 if !self.dir_patterns.is_empty() {
338 // Shouldn't happen, but we're using -1 as a special index.
339 assert!(self.dir_patterns.len() < !0 as usize);
340
341 fill_todo(&mut self.todo, &self.dir_patterns, 0, &scope, self.options);
342 }
343 }
344
345 loop {
346 if self.dir_patterns.is_empty() || self.todo.is_empty() {
347 return None;
348 }
349
350 let (path, mut idx) = match self.todo.pop().unwrap() {
351 Ok(pair) => pair,
352 Err(e) => return Some(Err(e)),
353 };
354
355 // idx -1: was already checked by fill_todo, maybe path was '.' or
356 // '..' that we can't match here because of normalization.
357 if idx == !0 as usize {
358 if self.require_dir && !is_dir(&path) {
359 continue;
360 }
361 return Some(Ok(path));
362 }
363
364 if self.dir_patterns[idx].is_recursive {
365 let mut next = idx;
366
367 // collapse consecutive recursive patterns
368 while (next + 1) < self.dir_patterns.len()
369 && self.dir_patterns[next + 1].is_recursive
370 {
371 next += 1;
372 }
373
374 if is_dir(&path) {
375 // the path is a directory, so it's a match
376
377 // push this directory's contents
378 fill_todo(
379 &mut self.todo,
380 &self.dir_patterns,
381 next,
382 &path,
383 self.options,
384 );
385
386 if next == self.dir_patterns.len() - 1 {
387 // pattern ends in recursive pattern, so return this
388 // directory as a result
389 return Some(Ok(path));
390 } else {
391 // advanced to the next pattern for this path
392 idx = next + 1;
393 }
394 } else if next == self.dir_patterns.len() - 1 {
395 // not a directory and it's the last pattern, meaning no
396 // match
397 continue;
398 } else {
399 // advanced to the next pattern for this path
400 idx = next + 1;
401 }
402 }
403
404 // not recursive, so match normally
405 if self.dir_patterns[idx].matches_with(
406 {
407 match path.file_name().and_then(|s| s.to_str()) {
408 // FIXME (#9639): How do we handle non-utf8 filenames?
409 // Ignore them for now; ideally we'd still match them
410 // against a *
411 None => continue,
412 Some(x) => x,
413 }
414 },
415 self.options,
416 ) {
417 if idx == self.dir_patterns.len() - 1 {
418 // it is not possible for a pattern to match a directory
419 // *AND* its children so we don't need to check the
420 // children
421
422 if !self.require_dir || is_dir(&path) {
423 return Some(Ok(path));
424 }
425 } else {
426 fill_todo(
427 &mut self.todo,
428 &self.dir_patterns,
429 idx + 1,
430 &path,
431 self.options,
432 );
433 }
434 }
435 }
436 }
437}
438
439/// A pattern parsing error.
440#[derive(Debug)]
441#[allow(missing_copy_implementations)]
442pub struct PatternError {
443 /// The approximate character index of where the error occurred.
444 pub pos: usize,
445
446 /// A message describing the error.
447 pub msg: &'static str,
448}
449
450impl Error for PatternError {
451 fn description(&self) -> &str {
452 self.msg
453 }
454}
455
456impl fmt::Display for PatternError {
457 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
458 write!(
459 f,
460 "Pattern syntax error near position {}: {}",
461 self.pos, self.msg
462 )
463 }
464}
465
466/// A compiled Unix shell style pattern.
467///
468/// - `?` matches any single character.
469///
470/// - `*` matches any (possibly empty) sequence of characters.
471///
472/// - `**` matches the current directory and arbitrary subdirectories. This
473/// sequence **must** form a single path component, so both `**a` and `b**`
474/// are invalid and will result in an error. A sequence of more than two
475/// consecutive `*` characters is also invalid.
476///
477/// - `[...]` matches any character inside the brackets. Character sequences
478/// can also specify ranges of characters, as ordered by Unicode, so e.g.
479/// `[0-9]` specifies any character between 0 and 9 inclusive. An unclosed
480/// bracket is invalid.
481///
482/// - `[!...]` is the negation of `[...]`, i.e. it matches any characters
483/// **not** in the brackets.
484///
485/// - The metacharacters `?`, `*`, `[`, `]` can be matched by using brackets
486/// (e.g. `[?]`). When a `]` occurs immediately following `[` or `[!` then it
487/// is interpreted as being part of, rather then ending, the character set, so
488/// `]` and NOT `]` can be matched by `[]]` and `[!]]` respectively. The `-`
489/// character can be specified inside a character sequence pattern by placing
490/// it at the start or the end, e.g. `[abc-]`.
491#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
492pub struct Pattern {
493 original: String,
494 tokens: Vec<PatternToken>,
495 is_recursive: bool,
496}
497
498/// Show the original glob pattern.
499impl fmt::Display for Pattern {
500 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
501 self.original.fmt(f)
502 }
503}
504
505impl FromStr for Pattern {
506 type Err = PatternError;
507
508 fn from_str(s: &str) -> Result<Self, PatternError> {
509 Self::new(pattern:s)
510 }
511}
512
513#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
514enum PatternToken {
515 Char(char),
516 AnyChar,
517 AnySequence,
518 AnyRecursiveSequence,
519 AnyWithin(Vec<CharSpecifier>),
520 AnyExcept(Vec<CharSpecifier>),
521}
522
523#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
524enum CharSpecifier {
525 SingleChar(char),
526 CharRange(char, char),
527}
528
529#[derive(Copy, Clone, PartialEq)]
530enum MatchResult {
531 Match,
532 SubPatternDoesntMatch,
533 EntirePatternDoesntMatch,
534}
535
536const ERROR_WILDCARDS: &str = "wildcards are either regular `*` or recursive `**`";
537const ERROR_RECURSIVE_WILDCARDS: &str = "recursive wildcards must form a single path \
538 component";
539const ERROR_INVALID_RANGE: &str = "invalid range pattern";
540
541impl Pattern {
542 /// This function compiles Unix shell style patterns.
543 ///
544 /// An invalid glob pattern will yield a `PatternError`.
545 pub fn new(pattern: &str) -> Result<Self, PatternError> {
546 let chars = pattern.chars().collect::<Vec<_>>();
547 let mut tokens = Vec::new();
548 let mut is_recursive = false;
549 let mut i = 0;
550
551 while i < chars.len() {
552 match chars[i] {
553 '?' => {
554 tokens.push(AnyChar);
555 i += 1;
556 }
557 '*' => {
558 let old = i;
559
560 while i < chars.len() && chars[i] == '*' {
561 i += 1;
562 }
563
564 let count = i - old;
565
566 if count > 2 {
567 return Err(PatternError {
568 pos: old + 2,
569 msg: ERROR_WILDCARDS,
570 });
571 } else if count == 2 {
572 // ** can only be an entire path component
573 // i.e. a/**/b is valid, but a**/b or a/**b is not
574 // invalid matches are treated literally
575 let is_valid = if i == 2 || path::is_separator(chars[i - count - 1]) {
576 // it ends in a '/'
577 if i < chars.len() && path::is_separator(chars[i]) {
578 i += 1;
579 true
580 // or the pattern ends here
581 // this enables the existing globbing mechanism
582 } else if i == chars.len() {
583 true
584 // `**` ends in non-separator
585 } else {
586 return Err(PatternError {
587 pos: i,
588 msg: ERROR_RECURSIVE_WILDCARDS,
589 });
590 }
591 // `**` begins with non-separator
592 } else {
593 return Err(PatternError {
594 pos: old - 1,
595 msg: ERROR_RECURSIVE_WILDCARDS,
596 });
597 };
598
599 if is_valid {
600 // collapse consecutive AnyRecursiveSequence to a
601 // single one
602
603 let tokens_len = tokens.len();
604
605 if !(tokens_len > 1 && tokens[tokens_len - 1] == AnyRecursiveSequence) {
606 is_recursive = true;
607 tokens.push(AnyRecursiveSequence);
608 }
609 }
610 } else {
611 tokens.push(AnySequence);
612 }
613 }
614 '[' => {
615 if i + 4 <= chars.len() && chars[i + 1] == '!' {
616 match chars[i + 3..].iter().position(|x| *x == ']') {
617 None => (),
618 Some(j) => {
619 let chars = &chars[i + 2..i + 3 + j];
620 let cs = parse_char_specifiers(chars);
621 tokens.push(AnyExcept(cs));
622 i += j + 4;
623 continue;
624 }
625 }
626 } else if i + 3 <= chars.len() && chars[i + 1] != '!' {
627 match chars[i + 2..].iter().position(|x| *x == ']') {
628 None => (),
629 Some(j) => {
630 let cs = parse_char_specifiers(&chars[i + 1..i + 2 + j]);
631 tokens.push(AnyWithin(cs));
632 i += j + 3;
633 continue;
634 }
635 }
636 }
637
638 // if we get here then this is not a valid range pattern
639 return Err(PatternError {
640 pos: i,
641 msg: ERROR_INVALID_RANGE,
642 });
643 }
644 c => {
645 tokens.push(Char(c));
646 i += 1;
647 }
648 }
649 }
650
651 Ok(Self {
652 tokens,
653 original: pattern.to_string(),
654 is_recursive,
655 })
656 }
657
658 /// Escape metacharacters within the given string by surrounding them in
659 /// brackets. The resulting string will, when compiled into a `Pattern`,
660 /// match the input string and nothing else.
661 pub fn escape(s: &str) -> String {
662 let mut escaped = String::new();
663 for c in s.chars() {
664 match c {
665 // note that ! does not need escaping because it is only special
666 // inside brackets
667 '?' | '*' | '[' | ']' => {
668 escaped.push('[');
669 escaped.push(c);
670 escaped.push(']');
671 }
672 c => {
673 escaped.push(c);
674 }
675 }
676 }
677 escaped
678 }
679
680 /// Return if the given `str` matches this `Pattern` using the default
681 /// match options (i.e. `MatchOptions::new()`).
682 ///
683 /// # Examples
684 ///
685 /// ```rust
686 /// use glob::Pattern;
687 ///
688 /// assert!(Pattern::new("c?t").unwrap().matches("cat"));
689 /// assert!(Pattern::new("k[!e]tteh").unwrap().matches("kitteh"));
690 /// assert!(Pattern::new("d*g").unwrap().matches("doog"));
691 /// ```
692 pub fn matches(&self, str: &str) -> bool {
693 self.matches_with(str, MatchOptions::new())
694 }
695
696 /// Return if the given `Path`, when converted to a `str`, matches this
697 /// `Pattern` using the default match options (i.e. `MatchOptions::new()`).
698 pub fn matches_path(&self, path: &Path) -> bool {
699 // FIXME (#9639): This needs to handle non-utf8 paths
700 path.to_str().map_or(false, |s| self.matches(s))
701 }
702
703 /// Return if the given `str` matches this `Pattern` using the specified
704 /// match options.
705 pub fn matches_with(&self, str: &str, options: MatchOptions) -> bool {
706 self.matches_from(true, str.chars(), 0, options) == Match
707 }
708
709 /// Return if the given `Path`, when converted to a `str`, matches this
710 /// `Pattern` using the specified match options.
711 pub fn matches_path_with(&self, path: &Path, options: MatchOptions) -> bool {
712 // FIXME (#9639): This needs to handle non-utf8 paths
713 path.to_str()
714 .map_or(false, |s| self.matches_with(s, options))
715 }
716
717 /// Access the original glob pattern.
718 pub fn as_str(&self) -> &str {
719 &self.original
720 }
721
722 fn matches_from(
723 &self,
724 mut follows_separator: bool,
725 mut file: std::str::Chars,
726 i: usize,
727 options: MatchOptions,
728 ) -> MatchResult {
729 for (ti, token) in self.tokens[i..].iter().enumerate() {
730 match *token {
731 AnySequence | AnyRecursiveSequence => {
732 // ** must be at the start.
733 debug_assert!(match *token {
734 AnyRecursiveSequence => follows_separator,
735 _ => true,
736 });
737
738 // Empty match
739 match self.matches_from(follows_separator, file.clone(), i + ti + 1, options) {
740 SubPatternDoesntMatch => (), // keep trying
741 m => return m,
742 };
743
744 while let Some(c) = file.next() {
745 if follows_separator && options.require_literal_leading_dot && c == '.' {
746 return SubPatternDoesntMatch;
747 }
748 follows_separator = path::is_separator(c);
749 match *token {
750 AnyRecursiveSequence if !follows_separator => continue,
751 AnySequence
752 if options.require_literal_separator && follows_separator =>
753 {
754 return SubPatternDoesntMatch
755 }
756 _ => (),
757 }
758 match self.matches_from(
759 follows_separator,
760 file.clone(),
761 i + ti + 1,
762 options,
763 ) {
764 SubPatternDoesntMatch => (), // keep trying
765 m => return m,
766 }
767 }
768 }
769 _ => {
770 let c = match file.next() {
771 Some(c) => c,
772 None => return EntirePatternDoesntMatch,
773 };
774
775 let is_sep = path::is_separator(c);
776
777 if !match *token {
778 AnyChar | AnyWithin(..) | AnyExcept(..)
779 if (options.require_literal_separator && is_sep)
780 || (follows_separator
781 && options.require_literal_leading_dot
782 && c == '.') =>
783 {
784 false
785 }
786 AnyChar => true,
787 AnyWithin(ref specifiers) => in_char_specifiers(&specifiers, c, options),
788 AnyExcept(ref specifiers) => !in_char_specifiers(&specifiers, c, options),
789 Char(c2) => chars_eq(c, c2, options.case_sensitive),
790 AnySequence | AnyRecursiveSequence => unreachable!(),
791 } {
792 return SubPatternDoesntMatch;
793 }
794 follows_separator = is_sep;
795 }
796 }
797 }
798
799 // Iter is fused.
800 if file.next().is_none() {
801 Match
802 } else {
803 SubPatternDoesntMatch
804 }
805 }
806}
807
808// Fills `todo` with paths under `path` to be matched by `patterns[idx]`,
809// special-casing patterns to match `.` and `..`, and avoiding `readdir()`
810// calls when there are no metacharacters in the pattern.
811fn fill_todo(
812 todo: &mut Vec<Result<(PathBuf, usize), GlobError>>,
813 patterns: &[Pattern],
814 idx: usize,
815 path: &Path,
816 options: MatchOptions,
817) {
818 // convert a pattern that's just many Char(_) to a string
819 fn pattern_as_str(pattern: &Pattern) -> Option<String> {
820 let mut s = String::new();
821 for token in &pattern.tokens {
822 match *token {
823 Char(c) => s.push(c),
824 _ => return None,
825 }
826 }
827
828 Some(s)
829 }
830
831 let add = |todo: &mut Vec<_>, next_path: PathBuf| {
832 if idx + 1 == patterns.len() {
833 // We know it's good, so don't make the iterator match this path
834 // against the pattern again. In particular, it can't match
835 // . or .. globs since these never show up as path components.
836 todo.push(Ok((next_path, !0 as usize)));
837 } else {
838 fill_todo(todo, patterns, idx + 1, &next_path, options);
839 }
840 };
841
842 let pattern = &patterns[idx];
843 let is_dir = is_dir(path);
844 let curdir = path == Path::new(".");
845 match pattern_as_str(pattern) {
846 Some(s) => {
847 // This pattern component doesn't have any metacharacters, so we
848 // don't need to read the current directory to know where to
849 // continue. So instead of passing control back to the iterator,
850 // we can just check for that one entry and potentially recurse
851 // right away.
852 let special = "." == s || ".." == s;
853 let next_path = if curdir {
854 PathBuf::from(s)
855 } else {
856 path.join(&s)
857 };
858 if (special && is_dir) || (!special && fs::metadata(&next_path).is_ok()) {
859 add(todo, next_path);
860 }
861 }
862 None if is_dir => {
863 let dirs = fs::read_dir(path).and_then(|d| {
864 d.map(|e| {
865 e.map(|e| {
866 if curdir {
867 PathBuf::from(e.path().file_name().unwrap())
868 } else {
869 e.path()
870 }
871 })
872 })
873 .collect::<Result<Vec<_>, _>>()
874 });
875 match dirs {
876 Ok(mut children) => {
877 children.sort_by(|p1, p2| p2.file_name().cmp(&p1.file_name()));
878 todo.extend(children.into_iter().map(|x| Ok((x, idx))));
879
880 // Matching the special directory entries . and .. that
881 // refer to the current and parent directory respectively
882 // requires that the pattern has a leading dot, even if the
883 // `MatchOptions` field `require_literal_leading_dot` is not
884 // set.
885 if !pattern.tokens.is_empty() && pattern.tokens[0] == Char('.') {
886 for &special in &[".", ".."] {
887 if pattern.matches_with(special, options) {
888 add(todo, path.join(special));
889 }
890 }
891 }
892 }
893 Err(e) => {
894 todo.push(Err(GlobError {
895 path: path.to_path_buf(),
896 error: e,
897 }));
898 }
899 }
900 }
901 None => {
902 // not a directory, nothing more to find
903 }
904 }
905}
906
907fn parse_char_specifiers(s: &[char]) -> Vec<CharSpecifier> {
908 let mut cs: Vec = Vec::new();
909 let mut i: usize = 0;
910 while i < s.len() {
911 if i + 3 <= s.len() && s[i + 1] == '-' {
912 cs.push(CharRange(s[i], s[i + 2]));
913 i += 3;
914 } else {
915 cs.push(SingleChar(s[i]));
916 i += 1;
917 }
918 }
919 cs
920}
921
922fn in_char_specifiers(specifiers: &[CharSpecifier], c: char, options: MatchOptions) -> bool {
923 for &specifier in specifiers.iter() {
924 match specifier {
925 SingleChar(sc) => {
926 if chars_eq(c, sc, options.case_sensitive) {
927 return true;
928 }
929 }
930 CharRange(start, end) => {
931 // FIXME: work with non-ascii chars properly (issue #1347)
932 if !options.case_sensitive && c.is_ascii() && start.is_ascii() && end.is_ascii() {
933 let start = start.to_ascii_lowercase();
934 let end = end.to_ascii_lowercase();
935
936 let start_up = start.to_uppercase().next().unwrap();
937 let end_up = end.to_uppercase().next().unwrap();
938
939 // only allow case insensitive matching when
940 // both start and end are within a-z or A-Z
941 if start != start_up && end != end_up {
942 let c = c.to_ascii_lowercase();
943 if c >= start && c <= end {
944 return true;
945 }
946 }
947 }
948
949 if c >= start && c <= end {
950 return true;
951 }
952 }
953 }
954 }
955
956 false
957}
958
959/// A helper function to determine if two chars are (possibly case-insensitively) equal.
960fn chars_eq(a: char, b: char, case_sensitive: bool) -> bool {
961 if cfg!(windows) && path::is_separator(a) && path::is_separator(b) {
962 true
963 } else if !case_sensitive && a.is_ascii() && b.is_ascii() {
964 // FIXME: work with non-ascii chars properly (issue #9084)
965 a.to_ascii_lowercase() == b.to_ascii_lowercase()
966 } else {
967 a == b
968 }
969}
970
971/// Configuration options to modify the behaviour of `Pattern::matches_with(..)`.
972#[allow(missing_copy_implementations)]
973#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
974pub struct MatchOptions {
975 /// Whether or not patterns should be matched in a case-sensitive manner.
976 /// This currently only considers upper/lower case relationships between
977 /// ASCII characters, but in future this might be extended to work with
978 /// Unicode.
979 pub case_sensitive: bool,
980
981 /// Whether or not path-component separator characters (e.g. `/` on
982 /// Posix) must be matched by a literal `/`, rather than by `*` or `?` or
983 /// `[...]`.
984 pub require_literal_separator: bool,
985
986 /// Whether or not paths that contain components that start with a `.`
987 /// will require that `.` appears literally in the pattern; `*`, `?`, `**`,
988 /// or `[...]` will not match. This is useful because such files are
989 /// conventionally considered hidden on Unix systems and it might be
990 /// desirable to skip them when listing files.
991 pub require_literal_leading_dot: bool,
992}
993
994impl MatchOptions {
995 /// Constructs a new `MatchOptions` with default field values. This is used
996 /// when calling functions that do not take an explicit `MatchOptions`
997 /// parameter.
998 ///
999 /// This function always returns this value:
1000 ///
1001 /// ```rust,ignore
1002 /// MatchOptions {
1003 /// case_sensitive: true,
1004 /// require_literal_separator: false,
1005 /// require_literal_leading_dot: false
1006 /// }
1007 /// ```
1008 ///
1009 /// # Note
1010 /// The behavior of this method doesn't match `default()`'s. This returns
1011 /// `case_sensitive` as `true` while `default()` does it as `false`.
1012 // FIXME: Consider unity the behavior with `default()` in a next major release.
1013 pub fn new() -> Self {
1014 Self {
1015 case_sensitive: true,
1016 require_literal_separator: false,
1017 require_literal_leading_dot: false,
1018 }
1019 }
1020}
1021
1022#[cfg(test)]
1023mod test {
1024 use super::{glob, MatchOptions, Pattern};
1025 use std::path::Path;
1026
1027 #[test]
1028 fn test_pattern_from_str() {
1029 assert!("a*b".parse::<Pattern>().unwrap().matches("a_b"));
1030 assert!("a/**b".parse::<Pattern>().unwrap_err().pos == 4);
1031 }
1032
1033 #[test]
1034 fn test_wildcard_errors() {
1035 assert!(Pattern::new("a/**b").unwrap_err().pos == 4);
1036 assert!(Pattern::new("a/bc**").unwrap_err().pos == 3);
1037 assert!(Pattern::new("a/*****").unwrap_err().pos == 4);
1038 assert!(Pattern::new("a/b**c**d").unwrap_err().pos == 2);
1039 assert!(Pattern::new("a**b").unwrap_err().pos == 0);
1040 }
1041
1042 #[test]
1043 fn test_unclosed_bracket_errors() {
1044 assert!(Pattern::new("abc[def").unwrap_err().pos == 3);
1045 assert!(Pattern::new("abc[!def").unwrap_err().pos == 3);
1046 assert!(Pattern::new("abc[").unwrap_err().pos == 3);
1047 assert!(Pattern::new("abc[!").unwrap_err().pos == 3);
1048 assert!(Pattern::new("abc[d").unwrap_err().pos == 3);
1049 assert!(Pattern::new("abc[!d").unwrap_err().pos == 3);
1050 assert!(Pattern::new("abc[]").unwrap_err().pos == 3);
1051 assert!(Pattern::new("abc[!]").unwrap_err().pos == 3);
1052 }
1053
1054 #[test]
1055 fn test_glob_errors() {
1056 assert!(glob("a/**b").err().unwrap().pos == 4);
1057 assert!(glob("abc[def").err().unwrap().pos == 3);
1058 }
1059
1060 // this test assumes that there is a /root directory and that
1061 // the user running this test is not root or otherwise doesn't
1062 // have permission to read its contents
1063 #[cfg(all(unix, not(target_os = "macos")))]
1064 #[test]
1065 fn test_iteration_errors() {
1066 use std::io;
1067 let mut iter = glob("/root/*").unwrap();
1068
1069 // GlobErrors shouldn't halt iteration
1070 let next = iter.next();
1071 assert!(next.is_some());
1072
1073 let err = next.unwrap();
1074 assert!(err.is_err());
1075
1076 let err = err.err().unwrap();
1077 assert!(err.path() == Path::new("/root"));
1078 assert!(err.error().kind() == io::ErrorKind::PermissionDenied);
1079 }
1080
1081 #[test]
1082 fn test_absolute_pattern() {
1083 assert!(glob("/").unwrap().next().is_some());
1084 assert!(glob("//").unwrap().next().is_some());
1085
1086 // assume that the filesystem is not empty!
1087 assert!(glob("/*").unwrap().next().is_some());
1088
1089 #[cfg(not(windows))]
1090 fn win() {}
1091
1092 #[cfg(windows)]
1093 fn win() {
1094 use std::env::current_dir;
1095 use std::path::Component;
1096
1097 // check windows absolute paths with host/device components
1098 let root_with_device = current_dir()
1099 .ok()
1100 .and_then(|p| {
1101 match p.components().next().unwrap() {
1102 Component::Prefix(prefix_component) => {
1103 let path = Path::new(prefix_component.as_os_str());
1104 path.join("*");
1105 Some(path.to_path_buf())
1106 }
1107 _ => panic!("no prefix in this path"),
1108 }
1109 })
1110 .unwrap();
1111 // FIXME (#9639): This needs to handle non-utf8 paths
1112 assert!(glob(root_with_device.as_os_str().to_str().unwrap())
1113 .unwrap()
1114 .next()
1115 .is_some());
1116 }
1117 win()
1118 }
1119
1120 #[test]
1121 fn test_wildcards() {
1122 assert!(Pattern::new("a*b").unwrap().matches("a_b"));
1123 assert!(Pattern::new("a*b*c").unwrap().matches("abc"));
1124 assert!(!Pattern::new("a*b*c").unwrap().matches("abcd"));
1125 assert!(Pattern::new("a*b*c").unwrap().matches("a_b_c"));
1126 assert!(Pattern::new("a*b*c").unwrap().matches("a___b___c"));
1127 assert!(Pattern::new("abc*abc*abc")
1128 .unwrap()
1129 .matches("abcabcabcabcabcabcabc"));
1130 assert!(!Pattern::new("abc*abc*abc")
1131 .unwrap()
1132 .matches("abcabcabcabcabcabcabca"));
1133 assert!(Pattern::new("a*a*a*a*a*a*a*a*a")
1134 .unwrap()
1135 .matches("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"));
1136 assert!(Pattern::new("a*b[xyz]c*d").unwrap().matches("abxcdbxcddd"));
1137 }
1138
1139 #[test]
1140 fn test_recursive_wildcards() {
1141 let pat = Pattern::new("some/**/needle.txt").unwrap();
1142 assert!(pat.matches("some/needle.txt"));
1143 assert!(pat.matches("some/one/needle.txt"));
1144 assert!(pat.matches("some/one/two/needle.txt"));
1145 assert!(pat.matches("some/other/needle.txt"));
1146 assert!(!pat.matches("some/other/notthis.txt"));
1147
1148 // a single ** should be valid, for globs
1149 // Should accept anything
1150 let pat = Pattern::new("**").unwrap();
1151 assert!(pat.is_recursive);
1152 assert!(pat.matches("abcde"));
1153 assert!(pat.matches(""));
1154 assert!(pat.matches(".asdf"));
1155 assert!(pat.matches("/x/.asdf"));
1156
1157 // collapse consecutive wildcards
1158 let pat = Pattern::new("some/**/**/needle.txt").unwrap();
1159 assert!(pat.matches("some/needle.txt"));
1160 assert!(pat.matches("some/one/needle.txt"));
1161 assert!(pat.matches("some/one/two/needle.txt"));
1162 assert!(pat.matches("some/other/needle.txt"));
1163 assert!(!pat.matches("some/other/notthis.txt"));
1164
1165 // ** can begin the pattern
1166 let pat = Pattern::new("**/test").unwrap();
1167 assert!(pat.matches("one/two/test"));
1168 assert!(pat.matches("one/test"));
1169 assert!(pat.matches("test"));
1170
1171 // /** can begin the pattern
1172 let pat = Pattern::new("/**/test").unwrap();
1173 assert!(pat.matches("/one/two/test"));
1174 assert!(pat.matches("/one/test"));
1175 assert!(pat.matches("/test"));
1176 assert!(!pat.matches("/one/notthis"));
1177 assert!(!pat.matches("/notthis"));
1178
1179 // Only start sub-patterns on start of path segment.
1180 let pat = Pattern::new("**/.*").unwrap();
1181 assert!(pat.matches(".abc"));
1182 assert!(pat.matches("abc/.abc"));
1183 assert!(!pat.matches("ab.c"));
1184 assert!(!pat.matches("abc/ab.c"));
1185 }
1186
1187 #[test]
1188 fn test_lots_of_files() {
1189 // this is a good test because it touches lots of differently named files
1190 glob("/*/*/*/*").unwrap().skip(10000).next();
1191 }
1192
1193 #[test]
1194 fn test_range_pattern() {
1195 let pat = Pattern::new("a[0-9]b").unwrap();
1196 for i in 0..10 {
1197 assert!(pat.matches(&format!("a{}b", i)));
1198 }
1199 assert!(!pat.matches("a_b"));
1200
1201 let pat = Pattern::new("a[!0-9]b").unwrap();
1202 for i in 0..10 {
1203 assert!(!pat.matches(&format!("a{}b", i)));
1204 }
1205 assert!(pat.matches("a_b"));
1206
1207 let pats = ["[a-z123]", "[1a-z23]", "[123a-z]"];
1208 for &p in pats.iter() {
1209 let pat = Pattern::new(p).unwrap();
1210 for c in "abcdefghijklmnopqrstuvwxyz".chars() {
1211 assert!(pat.matches(&c.to_string()));
1212 }
1213 for c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ".chars() {
1214 let options = MatchOptions {
1215 case_sensitive: false,
1216 ..MatchOptions::new()
1217 };
1218 assert!(pat.matches_with(&c.to_string(), options));
1219 }
1220 assert!(pat.matches("1"));
1221 assert!(pat.matches("2"));
1222 assert!(pat.matches("3"));
1223 }
1224
1225 let pats = ["[abc-]", "[-abc]", "[a-c-]"];
1226 for &p in pats.iter() {
1227 let pat = Pattern::new(p).unwrap();
1228 assert!(pat.matches("a"));
1229 assert!(pat.matches("b"));
1230 assert!(pat.matches("c"));
1231 assert!(pat.matches("-"));
1232 assert!(!pat.matches("d"));
1233 }
1234
1235 let pat = Pattern::new("[2-1]").unwrap();
1236 assert!(!pat.matches("1"));
1237 assert!(!pat.matches("2"));
1238
1239 assert!(Pattern::new("[-]").unwrap().matches("-"));
1240 assert!(!Pattern::new("[!-]").unwrap().matches("-"));
1241 }
1242
1243 #[test]
1244 fn test_pattern_matches() {
1245 let txt_pat = Pattern::new("*hello.txt").unwrap();
1246 assert!(txt_pat.matches("hello.txt"));
1247 assert!(txt_pat.matches("gareth_says_hello.txt"));
1248 assert!(txt_pat.matches("some/path/to/hello.txt"));
1249 assert!(txt_pat.matches("some\\path\\to\\hello.txt"));
1250 assert!(txt_pat.matches("/an/absolute/path/to/hello.txt"));
1251 assert!(!txt_pat.matches("hello.txt-and-then-some"));
1252 assert!(!txt_pat.matches("goodbye.txt"));
1253
1254 let dir_pat = Pattern::new("*some/path/to/hello.txt").unwrap();
1255 assert!(dir_pat.matches("some/path/to/hello.txt"));
1256 assert!(dir_pat.matches("a/bigger/some/path/to/hello.txt"));
1257 assert!(!dir_pat.matches("some/path/to/hello.txt-and-then-some"));
1258 assert!(!dir_pat.matches("some/other/path/to/hello.txt"));
1259 }
1260
1261 #[test]
1262 fn test_pattern_escape() {
1263 let s = "_[_]_?_*_!_";
1264 assert_eq!(Pattern::escape(s), "_[[]_[]]_[?]_[*]_!_".to_string());
1265 assert!(Pattern::new(&Pattern::escape(s)).unwrap().matches(s));
1266 }
1267
1268 #[test]
1269 fn test_pattern_matches_case_insensitive() {
1270 let pat = Pattern::new("aBcDeFg").unwrap();
1271 let options = MatchOptions {
1272 case_sensitive: false,
1273 require_literal_separator: false,
1274 require_literal_leading_dot: false,
1275 };
1276
1277 assert!(pat.matches_with("aBcDeFg", options));
1278 assert!(pat.matches_with("abcdefg", options));
1279 assert!(pat.matches_with("ABCDEFG", options));
1280 assert!(pat.matches_with("AbCdEfG", options));
1281 }
1282
1283 #[test]
1284 fn test_pattern_matches_case_insensitive_range() {
1285 let pat_within = Pattern::new("[a]").unwrap();
1286 let pat_except = Pattern::new("[!a]").unwrap();
1287
1288 let options_case_insensitive = MatchOptions {
1289 case_sensitive: false,
1290 require_literal_separator: false,
1291 require_literal_leading_dot: false,
1292 };
1293 let options_case_sensitive = MatchOptions {
1294 case_sensitive: true,
1295 require_literal_separator: false,
1296 require_literal_leading_dot: false,
1297 };
1298
1299 assert!(pat_within.matches_with("a", options_case_insensitive));
1300 assert!(pat_within.matches_with("A", options_case_insensitive));
1301 assert!(!pat_within.matches_with("A", options_case_sensitive));
1302
1303 assert!(!pat_except.matches_with("a", options_case_insensitive));
1304 assert!(!pat_except.matches_with("A", options_case_insensitive));
1305 assert!(pat_except.matches_with("A", options_case_sensitive));
1306 }
1307
1308 #[test]
1309 fn test_pattern_matches_require_literal_separator() {
1310 let options_require_literal = MatchOptions {
1311 case_sensitive: true,
1312 require_literal_separator: true,
1313 require_literal_leading_dot: false,
1314 };
1315 let options_not_require_literal = MatchOptions {
1316 case_sensitive: true,
1317 require_literal_separator: false,
1318 require_literal_leading_dot: false,
1319 };
1320
1321 assert!(Pattern::new("abc/def")
1322 .unwrap()
1323 .matches_with("abc/def", options_require_literal));
1324 assert!(!Pattern::new("abc?def")
1325 .unwrap()
1326 .matches_with("abc/def", options_require_literal));
1327 assert!(!Pattern::new("abc*def")
1328 .unwrap()
1329 .matches_with("abc/def", options_require_literal));
1330 assert!(!Pattern::new("abc[/]def")
1331 .unwrap()
1332 .matches_with("abc/def", options_require_literal));
1333
1334 assert!(Pattern::new("abc/def")
1335 .unwrap()
1336 .matches_with("abc/def", options_not_require_literal));
1337 assert!(Pattern::new("abc?def")
1338 .unwrap()
1339 .matches_with("abc/def", options_not_require_literal));
1340 assert!(Pattern::new("abc*def")
1341 .unwrap()
1342 .matches_with("abc/def", options_not_require_literal));
1343 assert!(Pattern::new("abc[/]def")
1344 .unwrap()
1345 .matches_with("abc/def", options_not_require_literal));
1346 }
1347
1348 #[test]
1349 fn test_pattern_matches_require_literal_leading_dot() {
1350 let options_require_literal_leading_dot = MatchOptions {
1351 case_sensitive: true,
1352 require_literal_separator: false,
1353 require_literal_leading_dot: true,
1354 };
1355 let options_not_require_literal_leading_dot = MatchOptions {
1356 case_sensitive: true,
1357 require_literal_separator: false,
1358 require_literal_leading_dot: false,
1359 };
1360
1361 let f = |options| {
1362 Pattern::new("*.txt")
1363 .unwrap()
1364 .matches_with(".hello.txt", options)
1365 };
1366 assert!(f(options_not_require_literal_leading_dot));
1367 assert!(!f(options_require_literal_leading_dot));
1368
1369 let f = |options| {
1370 Pattern::new(".*.*")
1371 .unwrap()
1372 .matches_with(".hello.txt", options)
1373 };
1374 assert!(f(options_not_require_literal_leading_dot));
1375 assert!(f(options_require_literal_leading_dot));
1376
1377 let f = |options| {
1378 Pattern::new("aaa/bbb/*")
1379 .unwrap()
1380 .matches_with("aaa/bbb/.ccc", options)
1381 };
1382 assert!(f(options_not_require_literal_leading_dot));
1383 assert!(!f(options_require_literal_leading_dot));
1384
1385 let f = |options| {
1386 Pattern::new("aaa/bbb/*")
1387 .unwrap()
1388 .matches_with("aaa/bbb/c.c.c.", options)
1389 };
1390 assert!(f(options_not_require_literal_leading_dot));
1391 assert!(f(options_require_literal_leading_dot));
1392
1393 let f = |options| {
1394 Pattern::new("aaa/bbb/.*")
1395 .unwrap()
1396 .matches_with("aaa/bbb/.ccc", options)
1397 };
1398 assert!(f(options_not_require_literal_leading_dot));
1399 assert!(f(options_require_literal_leading_dot));
1400
1401 let f = |options| {
1402 Pattern::new("aaa/?bbb")
1403 .unwrap()
1404 .matches_with("aaa/.bbb", options)
1405 };
1406 assert!(f(options_not_require_literal_leading_dot));
1407 assert!(!f(options_require_literal_leading_dot));
1408
1409 let f = |options| {
1410 Pattern::new("aaa/[.]bbb")
1411 .unwrap()
1412 .matches_with("aaa/.bbb", options)
1413 };
1414 assert!(f(options_not_require_literal_leading_dot));
1415 assert!(!f(options_require_literal_leading_dot));
1416
1417 let f = |options| Pattern::new("**/*").unwrap().matches_with(".bbb", options);
1418 assert!(f(options_not_require_literal_leading_dot));
1419 assert!(!f(options_require_literal_leading_dot));
1420 }
1421
1422 #[test]
1423 fn test_matches_path() {
1424 // on windows, (Path::new("a/b").as_str().unwrap() == "a\\b"), so this
1425 // tests that / and \ are considered equivalent on windows
1426 assert!(Pattern::new("a/b").unwrap().matches_path(&Path::new("a/b")));
1427 }
1428
1429 #[test]
1430 fn test_path_join() {
1431 let pattern = Path::new("one").join(&Path::new("**/*.rs"));
1432 assert!(Pattern::new(pattern.to_str().unwrap()).is_ok());
1433 }
1434}
1435