1 | // Copyright 2014 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at |
3 | // http://rust-lang.org/COPYRIGHT. |
4 | // |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
8 | // option. This file may not be copied, modified, or distributed |
9 | // except according to those terms. |
10 | |
11 | //! Support for matching file paths against Unix shell style patterns. |
12 | //! |
13 | //! The `glob` and `glob_with` functions allow querying the filesystem for all |
14 | //! files that match a particular pattern (similar to the libc `glob` function). |
15 | //! The methods on the `Pattern` type provide functionality for checking if |
16 | //! individual paths match a particular pattern (similar to the libc `fnmatch` |
17 | //! function). |
18 | //! |
19 | //! For consistency across platforms, and for Windows support, this module |
20 | //! is implemented entirely in Rust rather than deferring to the libc |
21 | //! `glob`/`fnmatch` functions. |
22 | //! |
23 | //! # Examples |
24 | //! |
25 | //! To print all jpg files in `/media/` and all of its subdirectories. |
26 | //! |
27 | //! ```rust,no_run |
28 | //! use glob::glob; |
29 | //! |
30 | //! for entry in glob("/media/**/*.jpg" ).expect("Failed to read glob pattern" ) { |
31 | //! match entry { |
32 | //! Ok(path) => println!("{:?}" , path.display()), |
33 | //! Err(e) => println!("{:?}" , e), |
34 | //! } |
35 | //! } |
36 | //! ``` |
37 | //! |
38 | //! To print all files containing the letter "a", case insensitive, in a `local` |
39 | //! directory relative to the current working directory. This ignores errors |
40 | //! instead of printing them. |
41 | //! |
42 | //! ```rust,no_run |
43 | //! use glob::glob_with; |
44 | //! use glob::MatchOptions; |
45 | //! |
46 | //! let options = MatchOptions { |
47 | //! case_sensitive: false, |
48 | //! require_literal_separator: false, |
49 | //! require_literal_leading_dot: false, |
50 | //! }; |
51 | //! for entry in glob_with("local/*a*" , options).unwrap() { |
52 | //! if let Ok(path) = entry { |
53 | //! println!("{:?}" , path.display()) |
54 | //! } |
55 | //! } |
56 | //! ``` |
57 | |
58 | #![doc ( |
59 | html_logo_url = "https://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png" , |
60 | html_favicon_url = "https://www.rust-lang.org/favicon.ico" , |
61 | html_root_url = "https://docs.rs/glob/0.3.1" |
62 | )] |
63 | #![deny (missing_docs)] |
64 | |
65 | #[cfg (test)] |
66 | #[macro_use ] |
67 | extern crate doc_comment; |
68 | |
69 | #[cfg (test)] |
70 | doctest!("../README.md" ); |
71 | |
72 | use std::cmp; |
73 | use std::error::Error; |
74 | use std::fmt; |
75 | use std::fs; |
76 | use std::io; |
77 | use std::path::{self, Component, Path, PathBuf}; |
78 | use std::str::FromStr; |
79 | |
80 | use CharSpecifier::{CharRange, SingleChar}; |
81 | use MatchResult::{EntirePatternDoesntMatch, Match, SubPatternDoesntMatch}; |
82 | use PatternToken::AnyExcept; |
83 | use PatternToken::{AnyChar, AnyRecursiveSequence, AnySequence, AnyWithin, Char}; |
84 | |
85 | /// An iterator that yields `Path`s from the filesystem that match a particular |
86 | /// pattern. |
87 | /// |
88 | /// Note that it yields `GlobResult` in order to report any `IoErrors` that may |
89 | /// arise during iteration. If a directory matches but is unreadable, |
90 | /// thereby preventing its contents from being checked for matches, a |
91 | /// `GlobError` is returned to express this. |
92 | /// |
93 | /// See the `glob` function for more details. |
94 | #[derive(Debug)] |
95 | pub struct Paths { |
96 | dir_patterns: Vec<Pattern>, |
97 | require_dir: bool, |
98 | options: MatchOptions, |
99 | todo: Vec<Result<(PathBuf, usize), GlobError>>, |
100 | scope: Option<PathBuf>, |
101 | } |
102 | |
103 | /// Return an iterator that produces all the `Path`s that match the given |
104 | /// pattern using default match options, which may be absolute or relative to |
105 | /// the current working directory. |
106 | /// |
107 | /// This may return an error if the pattern is invalid. |
108 | /// |
109 | /// This method uses the default match options and is equivalent to calling |
110 | /// `glob_with(pattern, MatchOptions::new())`. Use `glob_with` directly if you |
111 | /// want to use non-default match options. |
112 | /// |
113 | /// When iterating, each result is a `GlobResult` which expresses the |
114 | /// possibility that there was an `IoError` when attempting to read the contents |
115 | /// of the matched path. In other words, each item returned by the iterator |
116 | /// will either be an `Ok(Path)` if the path matched, or an `Err(GlobError)` if |
117 | /// the path (partially) matched _but_ its contents could not be read in order |
118 | /// to determine if its contents matched. |
119 | /// |
120 | /// See the `Paths` documentation for more information. |
121 | /// |
122 | /// # Examples |
123 | /// |
124 | /// Consider a directory `/media/pictures` containing only the files |
125 | /// `kittens.jpg`, `puppies.jpg` and `hamsters.gif`: |
126 | /// |
127 | /// ```rust,no_run |
128 | /// use glob::glob; |
129 | /// |
130 | /// for entry in glob("/media/pictures/*.jpg" ).unwrap() { |
131 | /// match entry { |
132 | /// Ok(path) => println!("{:?}" , path.display()), |
133 | /// |
134 | /// // if the path matched but was unreadable, |
135 | /// // thereby preventing its contents from matching |
136 | /// Err(e) => println!("{:?}" , e), |
137 | /// } |
138 | /// } |
139 | /// ``` |
140 | /// |
141 | /// The above code will print: |
142 | /// |
143 | /// ```ignore |
144 | /// /media/pictures/kittens.jpg |
145 | /// /media/pictures/puppies.jpg |
146 | /// ``` |
147 | /// |
148 | /// If you want to ignore unreadable paths, you can use something like |
149 | /// `filter_map`: |
150 | /// |
151 | /// ```rust |
152 | /// use glob::glob; |
153 | /// use std::result::Result; |
154 | /// |
155 | /// for path in glob("/media/pictures/*.jpg" ).unwrap().filter_map(Result::ok) { |
156 | /// println!("{}" , path.display()); |
157 | /// } |
158 | /// ``` |
159 | /// Paths are yielded in alphabetical order. |
160 | pub fn glob(pattern: &str) -> Result<Paths, PatternError> { |
161 | glob_with(pattern, MatchOptions::new()) |
162 | } |
163 | |
164 | /// Return an iterator that produces all the `Path`s that match the given |
165 | /// pattern using the specified match options, which may be absolute or relative |
166 | /// to the current working directory. |
167 | /// |
168 | /// This may return an error if the pattern is invalid. |
169 | /// |
170 | /// This function accepts Unix shell style patterns as described by |
171 | /// `Pattern::new(..)`. The options given are passed through unchanged to |
172 | /// `Pattern::matches_with(..)` with the exception that |
173 | /// `require_literal_separator` is always set to `true` regardless of the value |
174 | /// passed to this function. |
175 | /// |
176 | /// Paths are yielded in alphabetical order. |
177 | pub fn glob_with(pattern: &str, options: MatchOptions) -> Result<Paths, PatternError> { |
178 | #[cfg (windows)] |
179 | fn check_windows_verbatim(p: &Path) -> bool { |
180 | match p.components().next() { |
181 | Some(Component::Prefix(ref p)) => p.kind().is_verbatim(), |
182 | _ => false, |
183 | } |
184 | } |
185 | #[cfg (not(windows))] |
186 | fn check_windows_verbatim(_: &Path) -> bool { |
187 | false |
188 | } |
189 | |
190 | #[cfg (windows)] |
191 | fn to_scope(p: &Path) -> PathBuf { |
192 | // FIXME handle volume relative paths here |
193 | p.to_path_buf() |
194 | } |
195 | #[cfg (not(windows))] |
196 | fn to_scope(p: &Path) -> PathBuf { |
197 | p.to_path_buf() |
198 | } |
199 | |
200 | // make sure that the pattern is valid first, else early return with error |
201 | if let Err(err) = Pattern::new(pattern) { |
202 | return Err(err); |
203 | } |
204 | |
205 | let mut components = Path::new(pattern).components().peekable(); |
206 | loop { |
207 | match components.peek() { |
208 | Some(&Component::Prefix(..)) | Some(&Component::RootDir) => { |
209 | components.next(); |
210 | } |
211 | _ => break, |
212 | } |
213 | } |
214 | let rest = components.map(|s| s.as_os_str()).collect::<PathBuf>(); |
215 | let normalized_pattern = Path::new(pattern).iter().collect::<PathBuf>(); |
216 | let root_len = normalized_pattern.to_str().unwrap().len() - rest.to_str().unwrap().len(); |
217 | let root = if root_len > 0 { |
218 | Some(Path::new(&pattern[..root_len])) |
219 | } else { |
220 | None |
221 | }; |
222 | |
223 | if root_len > 0 && check_windows_verbatim(root.unwrap()) { |
224 | // FIXME: How do we want to handle verbatim paths? I'm inclined to |
225 | // return nothing, since we can't very well find all UNC shares with a |
226 | // 1-letter server name. |
227 | return Ok(Paths { |
228 | dir_patterns: Vec::new(), |
229 | require_dir: false, |
230 | options, |
231 | todo: Vec::new(), |
232 | scope: None, |
233 | }); |
234 | } |
235 | |
236 | let scope = root.map_or_else(|| PathBuf::from("." ), to_scope); |
237 | |
238 | let mut dir_patterns = Vec::new(); |
239 | let components = |
240 | pattern[cmp::min(root_len, pattern.len())..].split_terminator(path::is_separator); |
241 | |
242 | for component in components { |
243 | dir_patterns.push(Pattern::new(component)?); |
244 | } |
245 | |
246 | if root_len == pattern.len() { |
247 | dir_patterns.push(Pattern { |
248 | original: "" .to_string(), |
249 | tokens: Vec::new(), |
250 | is_recursive: false, |
251 | }); |
252 | } |
253 | |
254 | let last_is_separator = pattern.chars().next_back().map(path::is_separator); |
255 | let require_dir = last_is_separator == Some(true); |
256 | let todo = Vec::new(); |
257 | |
258 | Ok(Paths { |
259 | dir_patterns, |
260 | require_dir, |
261 | options, |
262 | todo, |
263 | scope: Some(scope), |
264 | }) |
265 | } |
266 | |
267 | /// A glob iteration error. |
268 | /// |
269 | /// This is typically returned when a particular path cannot be read |
270 | /// to determine if its contents match the glob pattern. This is possible |
271 | /// if the program lacks the appropriate permissions, for example. |
272 | #[derive(Debug)] |
273 | pub struct GlobError { |
274 | path: PathBuf, |
275 | error: io::Error, |
276 | } |
277 | |
278 | impl GlobError { |
279 | /// The Path that the error corresponds to. |
280 | pub fn path(&self) -> &Path { |
281 | &self.path |
282 | } |
283 | |
284 | /// The error in question. |
285 | pub fn error(&self) -> &io::Error { |
286 | &self.error |
287 | } |
288 | |
289 | /// Consumes self, returning the _raw_ underlying `io::Error` |
290 | pub fn into_error(self) -> io::Error { |
291 | self.error |
292 | } |
293 | } |
294 | |
295 | impl Error for GlobError { |
296 | #[allow (deprecated)] |
297 | fn description(&self) -> &str { |
298 | self.error.description() |
299 | } |
300 | |
301 | #[allow (unknown_lints, bare_trait_objects)] |
302 | fn cause(&self) -> Option<&Error> { |
303 | Some(&self.error) |
304 | } |
305 | } |
306 | |
307 | impl fmt::Display for GlobError { |
308 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
309 | write!( |
310 | f, |
311 | "attempting to read `{}` resulted in an error: {}" , |
312 | self.path.display(), |
313 | self.error |
314 | ) |
315 | } |
316 | } |
317 | |
318 | fn is_dir(p: &Path) -> bool { |
319 | fs::metadata(p).map(|m| m.is_dir()).unwrap_or(false) |
320 | } |
321 | |
322 | /// An alias for a glob iteration result. |
323 | /// |
324 | /// This represents either a matched path or a glob iteration error, |
325 | /// such as failing to read a particular directory's contents. |
326 | pub type GlobResult = Result<PathBuf, GlobError>; |
327 | |
328 | impl Iterator for Paths { |
329 | type Item = GlobResult; |
330 | |
331 | fn next(&mut self) -> Option<GlobResult> { |
332 | // the todo buffer hasn't been initialized yet, so it's done at this |
333 | // point rather than in glob() so that the errors are unified that is, |
334 | // failing to fill the buffer is an iteration error construction of the |
335 | // iterator (i.e. glob()) only fails if it fails to compile the Pattern |
336 | if let Some(scope) = self.scope.take() { |
337 | if !self.dir_patterns.is_empty() { |
338 | // Shouldn't happen, but we're using -1 as a special index. |
339 | assert!(self.dir_patterns.len() < !0 as usize); |
340 | |
341 | fill_todo(&mut self.todo, &self.dir_patterns, 0, &scope, self.options); |
342 | } |
343 | } |
344 | |
345 | loop { |
346 | if self.dir_patterns.is_empty() || self.todo.is_empty() { |
347 | return None; |
348 | } |
349 | |
350 | let (path, mut idx) = match self.todo.pop().unwrap() { |
351 | Ok(pair) => pair, |
352 | Err(e) => return Some(Err(e)), |
353 | }; |
354 | |
355 | // idx -1: was already checked by fill_todo, maybe path was '.' or |
356 | // '..' that we can't match here because of normalization. |
357 | if idx == !0 as usize { |
358 | if self.require_dir && !is_dir(&path) { |
359 | continue; |
360 | } |
361 | return Some(Ok(path)); |
362 | } |
363 | |
364 | if self.dir_patterns[idx].is_recursive { |
365 | let mut next = idx; |
366 | |
367 | // collapse consecutive recursive patterns |
368 | while (next + 1) < self.dir_patterns.len() |
369 | && self.dir_patterns[next + 1].is_recursive |
370 | { |
371 | next += 1; |
372 | } |
373 | |
374 | if is_dir(&path) { |
375 | // the path is a directory, so it's a match |
376 | |
377 | // push this directory's contents |
378 | fill_todo( |
379 | &mut self.todo, |
380 | &self.dir_patterns, |
381 | next, |
382 | &path, |
383 | self.options, |
384 | ); |
385 | |
386 | if next == self.dir_patterns.len() - 1 { |
387 | // pattern ends in recursive pattern, so return this |
388 | // directory as a result |
389 | return Some(Ok(path)); |
390 | } else { |
391 | // advanced to the next pattern for this path |
392 | idx = next + 1; |
393 | } |
394 | } else if next == self.dir_patterns.len() - 1 { |
395 | // not a directory and it's the last pattern, meaning no |
396 | // match |
397 | continue; |
398 | } else { |
399 | // advanced to the next pattern for this path |
400 | idx = next + 1; |
401 | } |
402 | } |
403 | |
404 | // not recursive, so match normally |
405 | if self.dir_patterns[idx].matches_with( |
406 | { |
407 | match path.file_name().and_then(|s| s.to_str()) { |
408 | // FIXME (#9639): How do we handle non-utf8 filenames? |
409 | // Ignore them for now; ideally we'd still match them |
410 | // against a * |
411 | None => continue, |
412 | Some(x) => x, |
413 | } |
414 | }, |
415 | self.options, |
416 | ) { |
417 | if idx == self.dir_patterns.len() - 1 { |
418 | // it is not possible for a pattern to match a directory |
419 | // *AND* its children so we don't need to check the |
420 | // children |
421 | |
422 | if !self.require_dir || is_dir(&path) { |
423 | return Some(Ok(path)); |
424 | } |
425 | } else { |
426 | fill_todo( |
427 | &mut self.todo, |
428 | &self.dir_patterns, |
429 | idx + 1, |
430 | &path, |
431 | self.options, |
432 | ); |
433 | } |
434 | } |
435 | } |
436 | } |
437 | } |
438 | |
439 | /// A pattern parsing error. |
440 | #[derive(Debug)] |
441 | #[allow (missing_copy_implementations)] |
442 | pub struct PatternError { |
443 | /// The approximate character index of where the error occurred. |
444 | pub pos: usize, |
445 | |
446 | /// A message describing the error. |
447 | pub msg: &'static str, |
448 | } |
449 | |
450 | impl Error for PatternError { |
451 | fn description(&self) -> &str { |
452 | self.msg |
453 | } |
454 | } |
455 | |
456 | impl fmt::Display for PatternError { |
457 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
458 | write!( |
459 | f, |
460 | "Pattern syntax error near position {}: {}" , |
461 | self.pos, self.msg |
462 | ) |
463 | } |
464 | } |
465 | |
466 | /// A compiled Unix shell style pattern. |
467 | /// |
468 | /// - `?` matches any single character. |
469 | /// |
470 | /// - `*` matches any (possibly empty) sequence of characters. |
471 | /// |
472 | /// - `**` matches the current directory and arbitrary subdirectories. This |
473 | /// sequence **must** form a single path component, so both `**a` and `b**` |
474 | /// are invalid and will result in an error. A sequence of more than two |
475 | /// consecutive `*` characters is also invalid. |
476 | /// |
477 | /// - `[...]` matches any character inside the brackets. Character sequences |
478 | /// can also specify ranges of characters, as ordered by Unicode, so e.g. |
479 | /// `[0-9]` specifies any character between 0 and 9 inclusive. An unclosed |
480 | /// bracket is invalid. |
481 | /// |
482 | /// - `[!...]` is the negation of `[...]`, i.e. it matches any characters |
483 | /// **not** in the brackets. |
484 | /// |
485 | /// - The metacharacters `?`, `*`, `[`, `]` can be matched by using brackets |
486 | /// (e.g. `[?]`). When a `]` occurs immediately following `[` or `[!` then it |
487 | /// is interpreted as being part of, rather then ending, the character set, so |
488 | /// `]` and NOT `]` can be matched by `[]]` and `[!]]` respectively. The `-` |
489 | /// character can be specified inside a character sequence pattern by placing |
490 | /// it at the start or the end, e.g. `[abc-]`. |
491 | #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)] |
492 | pub struct Pattern { |
493 | original: String, |
494 | tokens: Vec<PatternToken>, |
495 | is_recursive: bool, |
496 | } |
497 | |
498 | /// Show the original glob pattern. |
499 | impl fmt::Display for Pattern { |
500 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
501 | self.original.fmt(f) |
502 | } |
503 | } |
504 | |
505 | impl FromStr for Pattern { |
506 | type Err = PatternError; |
507 | |
508 | fn from_str(s: &str) -> Result<Self, PatternError> { |
509 | Self::new(s) |
510 | } |
511 | } |
512 | |
513 | #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] |
514 | enum PatternToken { |
515 | Char(char), |
516 | AnyChar, |
517 | AnySequence, |
518 | AnyRecursiveSequence, |
519 | AnyWithin(Vec<CharSpecifier>), |
520 | AnyExcept(Vec<CharSpecifier>), |
521 | } |
522 | |
523 | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] |
524 | enum CharSpecifier { |
525 | SingleChar(char), |
526 | CharRange(char, char), |
527 | } |
528 | |
529 | #[derive(Copy, Clone, PartialEq)] |
530 | enum MatchResult { |
531 | Match, |
532 | SubPatternDoesntMatch, |
533 | EntirePatternDoesntMatch, |
534 | } |
535 | |
536 | const ERROR_WILDCARDS: &str = "wildcards are either regular `*` or recursive `**`" ; |
537 | const ERROR_RECURSIVE_WILDCARDS: &str = "recursive wildcards must form a single path \ |
538 | component" ; |
539 | const ERROR_INVALID_RANGE: &str = "invalid range pattern" ; |
540 | |
541 | impl Pattern { |
542 | /// This function compiles Unix shell style patterns. |
543 | /// |
544 | /// An invalid glob pattern will yield a `PatternError`. |
545 | pub fn new(pattern: &str) -> Result<Self, PatternError> { |
546 | let chars = pattern.chars().collect::<Vec<_>>(); |
547 | let mut tokens = Vec::new(); |
548 | let mut is_recursive = false; |
549 | let mut i = 0; |
550 | |
551 | while i < chars.len() { |
552 | match chars[i] { |
553 | '?' => { |
554 | tokens.push(AnyChar); |
555 | i += 1; |
556 | } |
557 | '*' => { |
558 | let old = i; |
559 | |
560 | while i < chars.len() && chars[i] == '*' { |
561 | i += 1; |
562 | } |
563 | |
564 | let count = i - old; |
565 | |
566 | if count > 2 { |
567 | return Err(PatternError { |
568 | pos: old + 2, |
569 | msg: ERROR_WILDCARDS, |
570 | }); |
571 | } else if count == 2 { |
572 | // ** can only be an entire path component |
573 | // i.e. a/**/b is valid, but a**/b or a/**b is not |
574 | // invalid matches are treated literally |
575 | let is_valid = if i == 2 || path::is_separator(chars[i - count - 1]) { |
576 | // it ends in a '/' |
577 | if i < chars.len() && path::is_separator(chars[i]) { |
578 | i += 1; |
579 | true |
580 | // or the pattern ends here |
581 | // this enables the existing globbing mechanism |
582 | } else if i == chars.len() { |
583 | true |
584 | // `**` ends in non-separator |
585 | } else { |
586 | return Err(PatternError { |
587 | pos: i, |
588 | msg: ERROR_RECURSIVE_WILDCARDS, |
589 | }); |
590 | } |
591 | // `**` begins with non-separator |
592 | } else { |
593 | return Err(PatternError { |
594 | pos: old - 1, |
595 | msg: ERROR_RECURSIVE_WILDCARDS, |
596 | }); |
597 | }; |
598 | |
599 | if is_valid { |
600 | // collapse consecutive AnyRecursiveSequence to a |
601 | // single one |
602 | |
603 | let tokens_len = tokens.len(); |
604 | |
605 | if !(tokens_len > 1 && tokens[tokens_len - 1] == AnyRecursiveSequence) { |
606 | is_recursive = true; |
607 | tokens.push(AnyRecursiveSequence); |
608 | } |
609 | } |
610 | } else { |
611 | tokens.push(AnySequence); |
612 | } |
613 | } |
614 | '[' => { |
615 | if i + 4 <= chars.len() && chars[i + 1] == '!' { |
616 | match chars[i + 3..].iter().position(|x| *x == ']' ) { |
617 | None => (), |
618 | Some(j) => { |
619 | let chars = &chars[i + 2..i + 3 + j]; |
620 | let cs = parse_char_specifiers(chars); |
621 | tokens.push(AnyExcept(cs)); |
622 | i += j + 4; |
623 | continue; |
624 | } |
625 | } |
626 | } else if i + 3 <= chars.len() && chars[i + 1] != '!' { |
627 | match chars[i + 2..].iter().position(|x| *x == ']' ) { |
628 | None => (), |
629 | Some(j) => { |
630 | let cs = parse_char_specifiers(&chars[i + 1..i + 2 + j]); |
631 | tokens.push(AnyWithin(cs)); |
632 | i += j + 3; |
633 | continue; |
634 | } |
635 | } |
636 | } |
637 | |
638 | // if we get here then this is not a valid range pattern |
639 | return Err(PatternError { |
640 | pos: i, |
641 | msg: ERROR_INVALID_RANGE, |
642 | }); |
643 | } |
644 | c => { |
645 | tokens.push(Char(c)); |
646 | i += 1; |
647 | } |
648 | } |
649 | } |
650 | |
651 | Ok(Self { |
652 | tokens, |
653 | original: pattern.to_string(), |
654 | is_recursive, |
655 | }) |
656 | } |
657 | |
658 | /// Escape metacharacters within the given string by surrounding them in |
659 | /// brackets. The resulting string will, when compiled into a `Pattern`, |
660 | /// match the input string and nothing else. |
661 | pub fn escape(s: &str) -> String { |
662 | let mut escaped = String::new(); |
663 | for c in s.chars() { |
664 | match c { |
665 | // note that ! does not need escaping because it is only special |
666 | // inside brackets |
667 | '?' | '*' | '[' | ']' => { |
668 | escaped.push('[' ); |
669 | escaped.push(c); |
670 | escaped.push(']' ); |
671 | } |
672 | c => { |
673 | escaped.push(c); |
674 | } |
675 | } |
676 | } |
677 | escaped |
678 | } |
679 | |
680 | /// Return if the given `str` matches this `Pattern` using the default |
681 | /// match options (i.e. `MatchOptions::new()`). |
682 | /// |
683 | /// # Examples |
684 | /// |
685 | /// ```rust |
686 | /// use glob::Pattern; |
687 | /// |
688 | /// assert!(Pattern::new("c?t" ).unwrap().matches("cat" )); |
689 | /// assert!(Pattern::new("k[!e]tteh" ).unwrap().matches("kitteh" )); |
690 | /// assert!(Pattern::new("d*g" ).unwrap().matches("doog" )); |
691 | /// ``` |
692 | pub fn matches(&self, str: &str) -> bool { |
693 | self.matches_with(str, MatchOptions::new()) |
694 | } |
695 | |
696 | /// Return if the given `Path`, when converted to a `str`, matches this |
697 | /// `Pattern` using the default match options (i.e. `MatchOptions::new()`). |
698 | pub fn matches_path(&self, path: &Path) -> bool { |
699 | // FIXME (#9639): This needs to handle non-utf8 paths |
700 | path.to_str().map_or(false, |s| self.matches(s)) |
701 | } |
702 | |
703 | /// Return if the given `str` matches this `Pattern` using the specified |
704 | /// match options. |
705 | pub fn matches_with(&self, str: &str, options: MatchOptions) -> bool { |
706 | self.matches_from(true, str.chars(), 0, options) == Match |
707 | } |
708 | |
709 | /// Return if the given `Path`, when converted to a `str`, matches this |
710 | /// `Pattern` using the specified match options. |
711 | pub fn matches_path_with(&self, path: &Path, options: MatchOptions) -> bool { |
712 | // FIXME (#9639): This needs to handle non-utf8 paths |
713 | path.to_str() |
714 | .map_or(false, |s| self.matches_with(s, options)) |
715 | } |
716 | |
717 | /// Access the original glob pattern. |
718 | pub fn as_str(&self) -> &str { |
719 | &self.original |
720 | } |
721 | |
722 | fn matches_from( |
723 | &self, |
724 | mut follows_separator: bool, |
725 | mut file: std::str::Chars, |
726 | i: usize, |
727 | options: MatchOptions, |
728 | ) -> MatchResult { |
729 | for (ti, token) in self.tokens[i..].iter().enumerate() { |
730 | match *token { |
731 | AnySequence | AnyRecursiveSequence => { |
732 | // ** must be at the start. |
733 | debug_assert!(match *token { |
734 | AnyRecursiveSequence => follows_separator, |
735 | _ => true, |
736 | }); |
737 | |
738 | // Empty match |
739 | match self.matches_from(follows_separator, file.clone(), i + ti + 1, options) { |
740 | SubPatternDoesntMatch => (), // keep trying |
741 | m => return m, |
742 | }; |
743 | |
744 | while let Some(c) = file.next() { |
745 | if follows_separator && options.require_literal_leading_dot && c == '.' { |
746 | return SubPatternDoesntMatch; |
747 | } |
748 | follows_separator = path::is_separator(c); |
749 | match *token { |
750 | AnyRecursiveSequence if !follows_separator => continue, |
751 | AnySequence |
752 | if options.require_literal_separator && follows_separator => |
753 | { |
754 | return SubPatternDoesntMatch |
755 | } |
756 | _ => (), |
757 | } |
758 | match self.matches_from( |
759 | follows_separator, |
760 | file.clone(), |
761 | i + ti + 1, |
762 | options, |
763 | ) { |
764 | SubPatternDoesntMatch => (), // keep trying |
765 | m => return m, |
766 | } |
767 | } |
768 | } |
769 | _ => { |
770 | let c = match file.next() { |
771 | Some(c) => c, |
772 | None => return EntirePatternDoesntMatch, |
773 | }; |
774 | |
775 | let is_sep = path::is_separator(c); |
776 | |
777 | if !match *token { |
778 | AnyChar | AnyWithin(..) | AnyExcept(..) |
779 | if (options.require_literal_separator && is_sep) |
780 | || (follows_separator |
781 | && options.require_literal_leading_dot |
782 | && c == '.' ) => |
783 | { |
784 | false |
785 | } |
786 | AnyChar => true, |
787 | AnyWithin(ref specifiers) => in_char_specifiers(&specifiers, c, options), |
788 | AnyExcept(ref specifiers) => !in_char_specifiers(&specifiers, c, options), |
789 | Char(c2) => chars_eq(c, c2, options.case_sensitive), |
790 | AnySequence | AnyRecursiveSequence => unreachable!(), |
791 | } { |
792 | return SubPatternDoesntMatch; |
793 | } |
794 | follows_separator = is_sep; |
795 | } |
796 | } |
797 | } |
798 | |
799 | // Iter is fused. |
800 | if file.next().is_none() { |
801 | Match |
802 | } else { |
803 | SubPatternDoesntMatch |
804 | } |
805 | } |
806 | } |
807 | |
808 | // Fills `todo` with paths under `path` to be matched by `patterns[idx]`, |
809 | // special-casing patterns to match `.` and `..`, and avoiding `readdir()` |
810 | // calls when there are no metacharacters in the pattern. |
811 | fn fill_todo( |
812 | todo: &mut Vec<Result<(PathBuf, usize), GlobError>>, |
813 | patterns: &[Pattern], |
814 | idx: usize, |
815 | path: &Path, |
816 | options: MatchOptions, |
817 | ) { |
818 | // convert a pattern that's just many Char(_) to a string |
819 | fn pattern_as_str(pattern: &Pattern) -> Option<String> { |
820 | let mut s = String::new(); |
821 | for token in &pattern.tokens { |
822 | match *token { |
823 | Char(c) => s.push(c), |
824 | _ => return None, |
825 | } |
826 | } |
827 | |
828 | Some(s) |
829 | } |
830 | |
831 | let add = |todo: &mut Vec<_>, next_path: PathBuf| { |
832 | if idx + 1 == patterns.len() { |
833 | // We know it's good, so don't make the iterator match this path |
834 | // against the pattern again. In particular, it can't match |
835 | // . or .. globs since these never show up as path components. |
836 | todo.push(Ok((next_path, !0 as usize))); |
837 | } else { |
838 | fill_todo(todo, patterns, idx + 1, &next_path, options); |
839 | } |
840 | }; |
841 | |
842 | let pattern = &patterns[idx]; |
843 | let is_dir = is_dir(path); |
844 | let curdir = path == Path::new("." ); |
845 | match pattern_as_str(pattern) { |
846 | Some(s) => { |
847 | // This pattern component doesn't have any metacharacters, so we |
848 | // don't need to read the current directory to know where to |
849 | // continue. So instead of passing control back to the iterator, |
850 | // we can just check for that one entry and potentially recurse |
851 | // right away. |
852 | let special = "." == s || ".." == s; |
853 | let next_path = if curdir { |
854 | PathBuf::from(s) |
855 | } else { |
856 | path.join(&s) |
857 | }; |
858 | if (special && is_dir) || (!special && fs::metadata(&next_path).is_ok()) { |
859 | add(todo, next_path); |
860 | } |
861 | } |
862 | None if is_dir => { |
863 | let dirs = fs::read_dir(path).and_then(|d| { |
864 | d.map(|e| { |
865 | e.map(|e| { |
866 | if curdir { |
867 | PathBuf::from(e.path().file_name().unwrap()) |
868 | } else { |
869 | e.path() |
870 | } |
871 | }) |
872 | }) |
873 | .collect::<Result<Vec<_>, _>>() |
874 | }); |
875 | match dirs { |
876 | Ok(mut children) => { |
877 | children.sort_by(|p1, p2| p2.file_name().cmp(&p1.file_name())); |
878 | todo.extend(children.into_iter().map(|x| Ok((x, idx)))); |
879 | |
880 | // Matching the special directory entries . and .. that |
881 | // refer to the current and parent directory respectively |
882 | // requires that the pattern has a leading dot, even if the |
883 | // `MatchOptions` field `require_literal_leading_dot` is not |
884 | // set. |
885 | if !pattern.tokens.is_empty() && pattern.tokens[0] == Char('.' ) { |
886 | for &special in &["." , ".." ] { |
887 | if pattern.matches_with(special, options) { |
888 | add(todo, path.join(special)); |
889 | } |
890 | } |
891 | } |
892 | } |
893 | Err(e) => { |
894 | todo.push(Err(GlobError { |
895 | path: path.to_path_buf(), |
896 | error: e, |
897 | })); |
898 | } |
899 | } |
900 | } |
901 | None => { |
902 | // not a directory, nothing more to find |
903 | } |
904 | } |
905 | } |
906 | |
907 | fn parse_char_specifiers(s: &[char]) -> Vec<CharSpecifier> { |
908 | let mut cs = Vec::new(); |
909 | let mut i = 0; |
910 | while i < s.len() { |
911 | if i + 3 <= s.len() && s[i + 1] == '-' { |
912 | cs.push(CharRange(s[i], s[i + 2])); |
913 | i += 3; |
914 | } else { |
915 | cs.push(SingleChar(s[i])); |
916 | i += 1; |
917 | } |
918 | } |
919 | cs |
920 | } |
921 | |
922 | fn in_char_specifiers(specifiers: &[CharSpecifier], c: char, options: MatchOptions) -> bool { |
923 | for &specifier in specifiers.iter() { |
924 | match specifier { |
925 | SingleChar(sc) => { |
926 | if chars_eq(c, sc, options.case_sensitive) { |
927 | return true; |
928 | } |
929 | } |
930 | CharRange(start, end) => { |
931 | // FIXME: work with non-ascii chars properly (issue #1347) |
932 | if !options.case_sensitive && c.is_ascii() && start.is_ascii() && end.is_ascii() { |
933 | let start = start.to_ascii_lowercase(); |
934 | let end = end.to_ascii_lowercase(); |
935 | |
936 | let start_up = start.to_uppercase().next().unwrap(); |
937 | let end_up = end.to_uppercase().next().unwrap(); |
938 | |
939 | // only allow case insensitive matching when |
940 | // both start and end are within a-z or A-Z |
941 | if start != start_up && end != end_up { |
942 | let c = c.to_ascii_lowercase(); |
943 | if c >= start && c <= end { |
944 | return true; |
945 | } |
946 | } |
947 | } |
948 | |
949 | if c >= start && c <= end { |
950 | return true; |
951 | } |
952 | } |
953 | } |
954 | } |
955 | |
956 | false |
957 | } |
958 | |
959 | /// A helper function to determine if two chars are (possibly case-insensitively) equal. |
960 | fn chars_eq(a: char, b: char, case_sensitive: bool) -> bool { |
961 | if cfg!(windows) && path::is_separator(a) && path::is_separator(b) { |
962 | true |
963 | } else if !case_sensitive && a.is_ascii() && b.is_ascii() { |
964 | // FIXME: work with non-ascii chars properly (issue #9084) |
965 | a.to_ascii_lowercase() == b.to_ascii_lowercase() |
966 | } else { |
967 | a == b |
968 | } |
969 | } |
970 | |
971 | /// Configuration options to modify the behaviour of `Pattern::matches_with(..)`. |
972 | #[allow (missing_copy_implementations)] |
973 | #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] |
974 | pub struct MatchOptions { |
975 | /// Whether or not patterns should be matched in a case-sensitive manner. |
976 | /// This currently only considers upper/lower case relationships between |
977 | /// ASCII characters, but in future this might be extended to work with |
978 | /// Unicode. |
979 | pub case_sensitive: bool, |
980 | |
981 | /// Whether or not path-component separator characters (e.g. `/` on |
982 | /// Posix) must be matched by a literal `/`, rather than by `*` or `?` or |
983 | /// `[...]`. |
984 | pub require_literal_separator: bool, |
985 | |
986 | /// Whether or not paths that contain components that start with a `.` |
987 | /// will require that `.` appears literally in the pattern; `*`, `?`, `**`, |
988 | /// or `[...]` will not match. This is useful because such files are |
989 | /// conventionally considered hidden on Unix systems and it might be |
990 | /// desirable to skip them when listing files. |
991 | pub require_literal_leading_dot: bool, |
992 | } |
993 | |
994 | impl MatchOptions { |
995 | /// Constructs a new `MatchOptions` with default field values. This is used |
996 | /// when calling functions that do not take an explicit `MatchOptions` |
997 | /// parameter. |
998 | /// |
999 | /// This function always returns this value: |
1000 | /// |
1001 | /// ```rust,ignore |
1002 | /// MatchOptions { |
1003 | /// case_sensitive: true, |
1004 | /// require_literal_separator: false, |
1005 | /// require_literal_leading_dot: false |
1006 | /// } |
1007 | /// ``` |
1008 | /// |
1009 | /// # Note |
1010 | /// The behavior of this method doesn't match `default()`'s. This returns |
1011 | /// `case_sensitive` as `true` while `default()` does it as `false`. |
1012 | // FIXME: Consider unity the behavior with `default()` in a next major release. |
1013 | pub fn new() -> Self { |
1014 | Self { |
1015 | case_sensitive: true, |
1016 | require_literal_separator: false, |
1017 | require_literal_leading_dot: false, |
1018 | } |
1019 | } |
1020 | } |
1021 | |
1022 | #[cfg (test)] |
1023 | mod test { |
1024 | use super::{glob, MatchOptions, Pattern}; |
1025 | use std::path::Path; |
1026 | |
1027 | #[test] |
1028 | fn test_pattern_from_str() { |
1029 | assert!("a*b" .parse::<Pattern>().unwrap().matches("a_b" )); |
1030 | assert!("a/**b" .parse::<Pattern>().unwrap_err().pos == 4); |
1031 | } |
1032 | |
1033 | #[test] |
1034 | fn test_wildcard_errors() { |
1035 | assert!(Pattern::new("a/**b" ).unwrap_err().pos == 4); |
1036 | assert!(Pattern::new("a/bc**" ).unwrap_err().pos == 3); |
1037 | assert!(Pattern::new("a/*****" ).unwrap_err().pos == 4); |
1038 | assert!(Pattern::new("a/b**c**d" ).unwrap_err().pos == 2); |
1039 | assert!(Pattern::new("a**b" ).unwrap_err().pos == 0); |
1040 | } |
1041 | |
1042 | #[test] |
1043 | fn test_unclosed_bracket_errors() { |
1044 | assert!(Pattern::new("abc[def" ).unwrap_err().pos == 3); |
1045 | assert!(Pattern::new("abc[!def" ).unwrap_err().pos == 3); |
1046 | assert!(Pattern::new("abc[" ).unwrap_err().pos == 3); |
1047 | assert!(Pattern::new("abc[!" ).unwrap_err().pos == 3); |
1048 | assert!(Pattern::new("abc[d" ).unwrap_err().pos == 3); |
1049 | assert!(Pattern::new("abc[!d" ).unwrap_err().pos == 3); |
1050 | assert!(Pattern::new("abc[]" ).unwrap_err().pos == 3); |
1051 | assert!(Pattern::new("abc[!]" ).unwrap_err().pos == 3); |
1052 | } |
1053 | |
1054 | #[test] |
1055 | fn test_glob_errors() { |
1056 | assert!(glob("a/**b" ).err().unwrap().pos == 4); |
1057 | assert!(glob("abc[def" ).err().unwrap().pos == 3); |
1058 | } |
1059 | |
1060 | // this test assumes that there is a /root directory and that |
1061 | // the user running this test is not root or otherwise doesn't |
1062 | // have permission to read its contents |
1063 | #[cfg (all(unix, not(target_os = "macos" )))] |
1064 | #[test] |
1065 | fn test_iteration_errors() { |
1066 | use std::io; |
1067 | let mut iter = glob("/root/*" ).unwrap(); |
1068 | |
1069 | // GlobErrors shouldn't halt iteration |
1070 | let next = iter.next(); |
1071 | assert!(next.is_some()); |
1072 | |
1073 | let err = next.unwrap(); |
1074 | assert!(err.is_err()); |
1075 | |
1076 | let err = err.err().unwrap(); |
1077 | assert!(err.path() == Path::new("/root" )); |
1078 | assert!(err.error().kind() == io::ErrorKind::PermissionDenied); |
1079 | } |
1080 | |
1081 | #[test] |
1082 | fn test_absolute_pattern() { |
1083 | assert!(glob("/" ).unwrap().next().is_some()); |
1084 | assert!(glob("//" ).unwrap().next().is_some()); |
1085 | |
1086 | // assume that the filesystem is not empty! |
1087 | assert!(glob("/*" ).unwrap().next().is_some()); |
1088 | |
1089 | #[cfg (not(windows))] |
1090 | fn win() {} |
1091 | |
1092 | #[cfg (windows)] |
1093 | fn win() { |
1094 | use std::env::current_dir; |
1095 | use std::path::Component; |
1096 | |
1097 | // check windows absolute paths with host/device components |
1098 | let root_with_device = current_dir() |
1099 | .ok() |
1100 | .and_then(|p| { |
1101 | match p.components().next().unwrap() { |
1102 | Component::Prefix(prefix_component) => { |
1103 | let path = Path::new(prefix_component.as_os_str()); |
1104 | path.join("*" ); |
1105 | Some(path.to_path_buf()) |
1106 | } |
1107 | _ => panic!("no prefix in this path" ), |
1108 | } |
1109 | }) |
1110 | .unwrap(); |
1111 | // FIXME (#9639): This needs to handle non-utf8 paths |
1112 | assert!(glob(root_with_device.as_os_str().to_str().unwrap()) |
1113 | .unwrap() |
1114 | .next() |
1115 | .is_some()); |
1116 | } |
1117 | win() |
1118 | } |
1119 | |
1120 | #[test] |
1121 | fn test_wildcards() { |
1122 | assert!(Pattern::new("a*b" ).unwrap().matches("a_b" )); |
1123 | assert!(Pattern::new("a*b*c" ).unwrap().matches("abc" )); |
1124 | assert!(!Pattern::new("a*b*c" ).unwrap().matches("abcd" )); |
1125 | assert!(Pattern::new("a*b*c" ).unwrap().matches("a_b_c" )); |
1126 | assert!(Pattern::new("a*b*c" ).unwrap().matches("a___b___c" )); |
1127 | assert!(Pattern::new("abc*abc*abc" ) |
1128 | .unwrap() |
1129 | .matches("abcabcabcabcabcabcabc" )); |
1130 | assert!(!Pattern::new("abc*abc*abc" ) |
1131 | .unwrap() |
1132 | .matches("abcabcabcabcabcabcabca" )); |
1133 | assert!(Pattern::new("a*a*a*a*a*a*a*a*a" ) |
1134 | .unwrap() |
1135 | .matches("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" )); |
1136 | assert!(Pattern::new("a*b[xyz]c*d" ).unwrap().matches("abxcdbxcddd" )); |
1137 | } |
1138 | |
1139 | #[test] |
1140 | fn test_recursive_wildcards() { |
1141 | let pat = Pattern::new("some/**/needle.txt" ).unwrap(); |
1142 | assert!(pat.matches("some/needle.txt" )); |
1143 | assert!(pat.matches("some/one/needle.txt" )); |
1144 | assert!(pat.matches("some/one/two/needle.txt" )); |
1145 | assert!(pat.matches("some/other/needle.txt" )); |
1146 | assert!(!pat.matches("some/other/notthis.txt" )); |
1147 | |
1148 | // a single ** should be valid, for globs |
1149 | // Should accept anything |
1150 | let pat = Pattern::new("**" ).unwrap(); |
1151 | assert!(pat.is_recursive); |
1152 | assert!(pat.matches("abcde" )); |
1153 | assert!(pat.matches("" )); |
1154 | assert!(pat.matches(".asdf" )); |
1155 | assert!(pat.matches("/x/.asdf" )); |
1156 | |
1157 | // collapse consecutive wildcards |
1158 | let pat = Pattern::new("some/**/**/needle.txt" ).unwrap(); |
1159 | assert!(pat.matches("some/needle.txt" )); |
1160 | assert!(pat.matches("some/one/needle.txt" )); |
1161 | assert!(pat.matches("some/one/two/needle.txt" )); |
1162 | assert!(pat.matches("some/other/needle.txt" )); |
1163 | assert!(!pat.matches("some/other/notthis.txt" )); |
1164 | |
1165 | // ** can begin the pattern |
1166 | let pat = Pattern::new("**/test" ).unwrap(); |
1167 | assert!(pat.matches("one/two/test" )); |
1168 | assert!(pat.matches("one/test" )); |
1169 | assert!(pat.matches("test" )); |
1170 | |
1171 | // /** can begin the pattern |
1172 | let pat = Pattern::new("/**/test" ).unwrap(); |
1173 | assert!(pat.matches("/one/two/test" )); |
1174 | assert!(pat.matches("/one/test" )); |
1175 | assert!(pat.matches("/test" )); |
1176 | assert!(!pat.matches("/one/notthis" )); |
1177 | assert!(!pat.matches("/notthis" )); |
1178 | |
1179 | // Only start sub-patterns on start of path segment. |
1180 | let pat = Pattern::new("**/.*" ).unwrap(); |
1181 | assert!(pat.matches(".abc" )); |
1182 | assert!(pat.matches("abc/.abc" )); |
1183 | assert!(!pat.matches("ab.c" )); |
1184 | assert!(!pat.matches("abc/ab.c" )); |
1185 | } |
1186 | |
1187 | #[test] |
1188 | fn test_lots_of_files() { |
1189 | // this is a good test because it touches lots of differently named files |
1190 | glob("/*/*/*/*" ).unwrap().skip(10000).next(); |
1191 | } |
1192 | |
1193 | #[test] |
1194 | fn test_range_pattern() { |
1195 | let pat = Pattern::new("a[0-9]b" ).unwrap(); |
1196 | for i in 0..10 { |
1197 | assert!(pat.matches(&format!("a{}b" , i))); |
1198 | } |
1199 | assert!(!pat.matches("a_b" )); |
1200 | |
1201 | let pat = Pattern::new("a[!0-9]b" ).unwrap(); |
1202 | for i in 0..10 { |
1203 | assert!(!pat.matches(&format!("a{}b" , i))); |
1204 | } |
1205 | assert!(pat.matches("a_b" )); |
1206 | |
1207 | let pats = ["[a-z123]" , "[1a-z23]" , "[123a-z]" ]; |
1208 | for &p in pats.iter() { |
1209 | let pat = Pattern::new(p).unwrap(); |
1210 | for c in "abcdefghijklmnopqrstuvwxyz" .chars() { |
1211 | assert!(pat.matches(&c.to_string())); |
1212 | } |
1213 | for c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ" .chars() { |
1214 | let options = MatchOptions { |
1215 | case_sensitive: false, |
1216 | ..MatchOptions::new() |
1217 | }; |
1218 | assert!(pat.matches_with(&c.to_string(), options)); |
1219 | } |
1220 | assert!(pat.matches("1" )); |
1221 | assert!(pat.matches("2" )); |
1222 | assert!(pat.matches("3" )); |
1223 | } |
1224 | |
1225 | let pats = ["[abc-]" , "[-abc]" , "[a-c-]" ]; |
1226 | for &p in pats.iter() { |
1227 | let pat = Pattern::new(p).unwrap(); |
1228 | assert!(pat.matches("a" )); |
1229 | assert!(pat.matches("b" )); |
1230 | assert!(pat.matches("c" )); |
1231 | assert!(pat.matches("-" )); |
1232 | assert!(!pat.matches("d" )); |
1233 | } |
1234 | |
1235 | let pat = Pattern::new("[2-1]" ).unwrap(); |
1236 | assert!(!pat.matches("1" )); |
1237 | assert!(!pat.matches("2" )); |
1238 | |
1239 | assert!(Pattern::new("[-]" ).unwrap().matches("-" )); |
1240 | assert!(!Pattern::new("[!-]" ).unwrap().matches("-" )); |
1241 | } |
1242 | |
1243 | #[test] |
1244 | fn test_pattern_matches() { |
1245 | let txt_pat = Pattern::new("*hello.txt" ).unwrap(); |
1246 | assert!(txt_pat.matches("hello.txt" )); |
1247 | assert!(txt_pat.matches("gareth_says_hello.txt" )); |
1248 | assert!(txt_pat.matches("some/path/to/hello.txt" )); |
1249 | assert!(txt_pat.matches("some \\path \\to \\hello.txt" )); |
1250 | assert!(txt_pat.matches("/an/absolute/path/to/hello.txt" )); |
1251 | assert!(!txt_pat.matches("hello.txt-and-then-some" )); |
1252 | assert!(!txt_pat.matches("goodbye.txt" )); |
1253 | |
1254 | let dir_pat = Pattern::new("*some/path/to/hello.txt" ).unwrap(); |
1255 | assert!(dir_pat.matches("some/path/to/hello.txt" )); |
1256 | assert!(dir_pat.matches("a/bigger/some/path/to/hello.txt" )); |
1257 | assert!(!dir_pat.matches("some/path/to/hello.txt-and-then-some" )); |
1258 | assert!(!dir_pat.matches("some/other/path/to/hello.txt" )); |
1259 | } |
1260 | |
1261 | #[test] |
1262 | fn test_pattern_escape() { |
1263 | let s = "_[_]_?_*_!_" ; |
1264 | assert_eq!(Pattern::escape(s), "_[[]_[]]_[?]_[*]_!_" .to_string()); |
1265 | assert!(Pattern::new(&Pattern::escape(s)).unwrap().matches(s)); |
1266 | } |
1267 | |
1268 | #[test] |
1269 | fn test_pattern_matches_case_insensitive() { |
1270 | let pat = Pattern::new("aBcDeFg" ).unwrap(); |
1271 | let options = MatchOptions { |
1272 | case_sensitive: false, |
1273 | require_literal_separator: false, |
1274 | require_literal_leading_dot: false, |
1275 | }; |
1276 | |
1277 | assert!(pat.matches_with("aBcDeFg" , options)); |
1278 | assert!(pat.matches_with("abcdefg" , options)); |
1279 | assert!(pat.matches_with("ABCDEFG" , options)); |
1280 | assert!(pat.matches_with("AbCdEfG" , options)); |
1281 | } |
1282 | |
1283 | #[test] |
1284 | fn test_pattern_matches_case_insensitive_range() { |
1285 | let pat_within = Pattern::new("[a]" ).unwrap(); |
1286 | let pat_except = Pattern::new("[!a]" ).unwrap(); |
1287 | |
1288 | let options_case_insensitive = MatchOptions { |
1289 | case_sensitive: false, |
1290 | require_literal_separator: false, |
1291 | require_literal_leading_dot: false, |
1292 | }; |
1293 | let options_case_sensitive = MatchOptions { |
1294 | case_sensitive: true, |
1295 | require_literal_separator: false, |
1296 | require_literal_leading_dot: false, |
1297 | }; |
1298 | |
1299 | assert!(pat_within.matches_with("a" , options_case_insensitive)); |
1300 | assert!(pat_within.matches_with("A" , options_case_insensitive)); |
1301 | assert!(!pat_within.matches_with("A" , options_case_sensitive)); |
1302 | |
1303 | assert!(!pat_except.matches_with("a" , options_case_insensitive)); |
1304 | assert!(!pat_except.matches_with("A" , options_case_insensitive)); |
1305 | assert!(pat_except.matches_with("A" , options_case_sensitive)); |
1306 | } |
1307 | |
1308 | #[test] |
1309 | fn test_pattern_matches_require_literal_separator() { |
1310 | let options_require_literal = MatchOptions { |
1311 | case_sensitive: true, |
1312 | require_literal_separator: true, |
1313 | require_literal_leading_dot: false, |
1314 | }; |
1315 | let options_not_require_literal = MatchOptions { |
1316 | case_sensitive: true, |
1317 | require_literal_separator: false, |
1318 | require_literal_leading_dot: false, |
1319 | }; |
1320 | |
1321 | assert!(Pattern::new("abc/def" ) |
1322 | .unwrap() |
1323 | .matches_with("abc/def" , options_require_literal)); |
1324 | assert!(!Pattern::new("abc?def" ) |
1325 | .unwrap() |
1326 | .matches_with("abc/def" , options_require_literal)); |
1327 | assert!(!Pattern::new("abc*def" ) |
1328 | .unwrap() |
1329 | .matches_with("abc/def" , options_require_literal)); |
1330 | assert!(!Pattern::new("abc[/]def" ) |
1331 | .unwrap() |
1332 | .matches_with("abc/def" , options_require_literal)); |
1333 | |
1334 | assert!(Pattern::new("abc/def" ) |
1335 | .unwrap() |
1336 | .matches_with("abc/def" , options_not_require_literal)); |
1337 | assert!(Pattern::new("abc?def" ) |
1338 | .unwrap() |
1339 | .matches_with("abc/def" , options_not_require_literal)); |
1340 | assert!(Pattern::new("abc*def" ) |
1341 | .unwrap() |
1342 | .matches_with("abc/def" , options_not_require_literal)); |
1343 | assert!(Pattern::new("abc[/]def" ) |
1344 | .unwrap() |
1345 | .matches_with("abc/def" , options_not_require_literal)); |
1346 | } |
1347 | |
1348 | #[test] |
1349 | fn test_pattern_matches_require_literal_leading_dot() { |
1350 | let options_require_literal_leading_dot = MatchOptions { |
1351 | case_sensitive: true, |
1352 | require_literal_separator: false, |
1353 | require_literal_leading_dot: true, |
1354 | }; |
1355 | let options_not_require_literal_leading_dot = MatchOptions { |
1356 | case_sensitive: true, |
1357 | require_literal_separator: false, |
1358 | require_literal_leading_dot: false, |
1359 | }; |
1360 | |
1361 | let f = |options| { |
1362 | Pattern::new("*.txt" ) |
1363 | .unwrap() |
1364 | .matches_with(".hello.txt" , options) |
1365 | }; |
1366 | assert!(f(options_not_require_literal_leading_dot)); |
1367 | assert!(!f(options_require_literal_leading_dot)); |
1368 | |
1369 | let f = |options| { |
1370 | Pattern::new(".*.*" ) |
1371 | .unwrap() |
1372 | .matches_with(".hello.txt" , options) |
1373 | }; |
1374 | assert!(f(options_not_require_literal_leading_dot)); |
1375 | assert!(f(options_require_literal_leading_dot)); |
1376 | |
1377 | let f = |options| { |
1378 | Pattern::new("aaa/bbb/*" ) |
1379 | .unwrap() |
1380 | .matches_with("aaa/bbb/.ccc" , options) |
1381 | }; |
1382 | assert!(f(options_not_require_literal_leading_dot)); |
1383 | assert!(!f(options_require_literal_leading_dot)); |
1384 | |
1385 | let f = |options| { |
1386 | Pattern::new("aaa/bbb/*" ) |
1387 | .unwrap() |
1388 | .matches_with("aaa/bbb/c.c.c." , options) |
1389 | }; |
1390 | assert!(f(options_not_require_literal_leading_dot)); |
1391 | assert!(f(options_require_literal_leading_dot)); |
1392 | |
1393 | let f = |options| { |
1394 | Pattern::new("aaa/bbb/.*" ) |
1395 | .unwrap() |
1396 | .matches_with("aaa/bbb/.ccc" , options) |
1397 | }; |
1398 | assert!(f(options_not_require_literal_leading_dot)); |
1399 | assert!(f(options_require_literal_leading_dot)); |
1400 | |
1401 | let f = |options| { |
1402 | Pattern::new("aaa/?bbb" ) |
1403 | .unwrap() |
1404 | .matches_with("aaa/.bbb" , options) |
1405 | }; |
1406 | assert!(f(options_not_require_literal_leading_dot)); |
1407 | assert!(!f(options_require_literal_leading_dot)); |
1408 | |
1409 | let f = |options| { |
1410 | Pattern::new("aaa/[.]bbb" ) |
1411 | .unwrap() |
1412 | .matches_with("aaa/.bbb" , options) |
1413 | }; |
1414 | assert!(f(options_not_require_literal_leading_dot)); |
1415 | assert!(!f(options_require_literal_leading_dot)); |
1416 | |
1417 | let f = |options| Pattern::new("**/*" ).unwrap().matches_with(".bbb" , options); |
1418 | assert!(f(options_not_require_literal_leading_dot)); |
1419 | assert!(!f(options_require_literal_leading_dot)); |
1420 | } |
1421 | |
1422 | #[test] |
1423 | fn test_matches_path() { |
1424 | // on windows, (Path::new("a/b").as_str().unwrap() == "a\\b"), so this |
1425 | // tests that / and \ are considered equivalent on windows |
1426 | assert!(Pattern::new("a/b" ).unwrap().matches_path(&Path::new("a/b" ))); |
1427 | } |
1428 | |
1429 | #[test] |
1430 | fn test_path_join() { |
1431 | let pattern = Path::new("one" ).join(&Path::new("**/*.rs" )); |
1432 | assert!(Pattern::new(pattern.to_str().unwrap()).is_ok()); |
1433 | } |
1434 | } |
1435 | |