1 | // Copyright 2014 The Rust Project Developers. See the COPYRIGHT |
---|---|
2 | // file at the top-level directory of this distribution and at |
3 | // http://rust-lang.org/COPYRIGHT. |
4 | // |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
8 | // option. This file may not be copied, modified, or distributed |
9 | // except according to those terms. |
10 | |
11 | //! Support for matching file paths against Unix shell style patterns. |
12 | //! |
13 | //! The `glob` and `glob_with` functions allow querying the filesystem for all |
14 | //! files that match a particular pattern (similar to the libc `glob` function). |
15 | //! The methods on the `Pattern` type provide functionality for checking if |
16 | //! individual paths match a particular pattern (similar to the libc `fnmatch` |
17 | //! function). |
18 | //! |
19 | //! For consistency across platforms, and for Windows support, this module |
20 | //! is implemented entirely in Rust rather than deferring to the libc |
21 | //! `glob`/`fnmatch` functions. |
22 | //! |
23 | //! # Examples |
24 | //! |
25 | //! To print all jpg files in `/media/` and all of its subdirectories. |
26 | //! |
27 | //! ```rust,no_run |
28 | //! use glob::glob; |
29 | //! |
30 | //! for entry in glob("/media/**/*.jpg").expect( "Failed to read glob pattern") { |
31 | //! match entry { |
32 | //! Ok(path) => println!("{:?}", path.display()), |
33 | //! Err(e) => println!("{:?}", e), |
34 | //! } |
35 | //! } |
36 | //! ``` |
37 | //! |
38 | //! To print all files containing the letter "a", case insensitive, in a `local` |
39 | //! directory relative to the current working directory. This ignores errors |
40 | //! instead of printing them. |
41 | //! |
42 | //! ```rust,no_run |
43 | //! use glob::glob_with; |
44 | //! use glob::MatchOptions; |
45 | //! |
46 | //! let options = MatchOptions { |
47 | //! case_sensitive: false, |
48 | //! require_literal_separator: false, |
49 | //! require_literal_leading_dot: false, |
50 | //! }; |
51 | //! for entry in glob_with("local/*a*", options).unwrap() { |
52 | //! if let Ok(path) = entry { |
53 | //! println!("{:?}", path.display()) |
54 | //! } |
55 | //! } |
56 | //! ``` |
57 | |
58 | #![doc( |
59 | html_logo_url = "https://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png", |
60 | html_favicon_url = "https://www.rust-lang.org/favicon.ico", |
61 | html_root_url = "https://docs.rs/glob/0.3.1" |
62 | )] |
63 | #![deny(missing_docs)] |
64 | |
65 | #[cfg(test)] |
66 | #[macro_use] |
67 | extern crate doc_comment; |
68 | |
69 | #[cfg(test)] |
70 | doctest!("../README.md"); |
71 | |
72 | use std::cmp; |
73 | use std::error::Error; |
74 | use std::fmt; |
75 | use std::fs; |
76 | use std::fs::DirEntry; |
77 | use std::io; |
78 | use std::ops::Deref; |
79 | use std::path::{self, Component, Path, PathBuf}; |
80 | use std::str::FromStr; |
81 | |
82 | use CharSpecifier::{CharRange, SingleChar}; |
83 | use MatchResult::{EntirePatternDoesntMatch, Match, SubPatternDoesntMatch}; |
84 | use PatternToken::AnyExcept; |
85 | use PatternToken::{AnyChar, AnyRecursiveSequence, AnySequence, AnyWithin, Char}; |
86 | |
87 | /// An iterator that yields `Path`s from the filesystem that match a particular |
88 | /// pattern. |
89 | /// |
90 | /// Note that it yields `GlobResult` in order to report any `IoErrors` that may |
91 | /// arise during iteration. If a directory matches but is unreadable, |
92 | /// thereby preventing its contents from being checked for matches, a |
93 | /// `GlobError` is returned to express this. |
94 | /// |
95 | /// See the `glob` function for more details. |
96 | #[derive(Debug)] |
97 | pub struct Paths { |
98 | dir_patterns: Vec<Pattern>, |
99 | require_dir: bool, |
100 | options: MatchOptions, |
101 | todo: Vec<Result<(PathWrapper, usize), GlobError>>, |
102 | scope: Option<PathWrapper>, |
103 | } |
104 | |
105 | /// Return an iterator that produces all the `Path`s that match the given |
106 | /// pattern using default match options, which may be absolute or relative to |
107 | /// the current working directory. |
108 | /// |
109 | /// This may return an error if the pattern is invalid. |
110 | /// |
111 | /// This method uses the default match options and is equivalent to calling |
112 | /// `glob_with(pattern, MatchOptions::new())`. Use `glob_with` directly if you |
113 | /// want to use non-default match options. |
114 | /// |
115 | /// When iterating, each result is a `GlobResult` which expresses the |
116 | /// possibility that there was an `IoError` when attempting to read the contents |
117 | /// of the matched path. In other words, each item returned by the iterator |
118 | /// will either be an `Ok(Path)` if the path matched, or an `Err(GlobError)` if |
119 | /// the path (partially) matched _but_ its contents could not be read in order |
120 | /// to determine if its contents matched. |
121 | /// |
122 | /// See the `Paths` documentation for more information. |
123 | /// |
124 | /// # Examples |
125 | /// |
126 | /// Consider a directory `/media/pictures` containing only the files |
127 | /// `kittens.jpg`, `puppies.jpg` and `hamsters.gif`: |
128 | /// |
129 | /// ```rust,no_run |
130 | /// use glob::glob; |
131 | /// |
132 | /// for entry in glob("/media/pictures/*.jpg").unwrap() { |
133 | /// match entry { |
134 | /// Ok(path) => println!("{:?}", path.display()), |
135 | /// |
136 | /// // if the path matched but was unreadable, |
137 | /// // thereby preventing its contents from matching |
138 | /// Err(e) => println!("{:?}", e), |
139 | /// } |
140 | /// } |
141 | /// ``` |
142 | /// |
143 | /// The above code will print: |
144 | /// |
145 | /// ```ignore |
146 | /// /media/pictures/kittens.jpg |
147 | /// /media/pictures/puppies.jpg |
148 | /// ``` |
149 | /// |
150 | /// If you want to ignore unreadable paths, you can use something like |
151 | /// `filter_map`: |
152 | /// |
153 | /// ```rust |
154 | /// use glob::glob; |
155 | /// use std::result::Result; |
156 | /// |
157 | /// for path in glob("/media/pictures/*.jpg").unwrap().filter_map(Result::ok) { |
158 | /// println!("{}", path.display()); |
159 | /// } |
160 | /// ``` |
161 | /// Paths are yielded in alphabetical order. |
162 | pub fn glob(pattern: &str) -> Result<Paths, PatternError> { |
163 | glob_with(pattern, options:MatchOptions::new()) |
164 | } |
165 | |
166 | /// Return an iterator that produces all the `Path`s that match the given |
167 | /// pattern using the specified match options, which may be absolute or relative |
168 | /// to the current working directory. |
169 | /// |
170 | /// This may return an error if the pattern is invalid. |
171 | /// |
172 | /// This function accepts Unix shell style patterns as described by |
173 | /// `Pattern::new(..)`. The options given are passed through unchanged to |
174 | /// `Pattern::matches_with(..)` with the exception that |
175 | /// `require_literal_separator` is always set to `true` regardless of the value |
176 | /// passed to this function. |
177 | /// |
178 | /// Paths are yielded in alphabetical order. |
179 | pub fn glob_with(pattern: &str, options: MatchOptions) -> Result<Paths, PatternError> { |
180 | #[cfg(windows)] |
181 | fn check_windows_verbatim(p: &Path) -> bool { |
182 | match p.components().next() { |
183 | Some(Component::Prefix(ref p)) => { |
184 | // Allow VerbatimDisk paths. std canonicalize() generates them, and they work fine |
185 | p.kind().is_verbatim() |
186 | && if let std::path::Prefix::VerbatimDisk(_) = p.kind() { |
187 | false |
188 | } else { |
189 | true |
190 | } |
191 | } |
192 | _ => false, |
193 | } |
194 | } |
195 | #[cfg(not(windows))] |
196 | fn check_windows_verbatim(_: &Path) -> bool { |
197 | false |
198 | } |
199 | |
200 | #[cfg(windows)] |
201 | fn to_scope(p: &Path) -> PathBuf { |
202 | // FIXME handle volume relative paths here |
203 | p.to_path_buf() |
204 | } |
205 | #[cfg(not(windows))] |
206 | fn to_scope(p: &Path) -> PathBuf { |
207 | p.to_path_buf() |
208 | } |
209 | |
210 | // make sure that the pattern is valid first, else early return with error |
211 | if let Err(err) = Pattern::new(pattern) { |
212 | return Err(err); |
213 | } |
214 | |
215 | let mut components = Path::new(pattern).components().peekable(); |
216 | loop { |
217 | match components.peek() { |
218 | Some(&Component::Prefix(..)) | Some(&Component::RootDir) => { |
219 | components.next(); |
220 | } |
221 | _ => break, |
222 | } |
223 | } |
224 | let rest = components.map(|s| s.as_os_str()).collect::<PathBuf>(); |
225 | let normalized_pattern = Path::new(pattern).iter().collect::<PathBuf>(); |
226 | let root_len = normalized_pattern.to_str().unwrap().len() - rest.to_str().unwrap().len(); |
227 | let root = if root_len > 0 { |
228 | Some(Path::new(&pattern[..root_len])) |
229 | } else { |
230 | None |
231 | }; |
232 | |
233 | if root_len > 0 && check_windows_verbatim(root.unwrap()) { |
234 | // FIXME: How do we want to handle verbatim paths? I'm inclined to |
235 | // return nothing, since we can't very well find all UNC shares with a |
236 | // 1-letter server name. |
237 | return Ok(Paths { |
238 | dir_patterns: Vec::new(), |
239 | require_dir: false, |
240 | options, |
241 | todo: Vec::new(), |
242 | scope: None, |
243 | }); |
244 | } |
245 | |
246 | let scope = root.map_or_else(|| PathBuf::from("."), to_scope); |
247 | let scope = PathWrapper::from_path(scope); |
248 | |
249 | let mut dir_patterns = Vec::new(); |
250 | let components = |
251 | pattern[cmp::min(root_len, pattern.len())..].split_terminator(path::is_separator); |
252 | |
253 | for component in components { |
254 | dir_patterns.push(Pattern::new(component)?); |
255 | } |
256 | |
257 | if root_len == pattern.len() { |
258 | dir_patterns.push(Pattern { |
259 | original: "".to_string(), |
260 | tokens: Vec::new(), |
261 | is_recursive: false, |
262 | }); |
263 | } |
264 | |
265 | let last_is_separator = pattern.chars().next_back().map(path::is_separator); |
266 | let require_dir = last_is_separator == Some(true); |
267 | let todo = Vec::new(); |
268 | |
269 | Ok(Paths { |
270 | dir_patterns, |
271 | require_dir, |
272 | options, |
273 | todo, |
274 | scope: Some(scope), |
275 | }) |
276 | } |
277 | |
278 | /// A glob iteration error. |
279 | /// |
280 | /// This is typically returned when a particular path cannot be read |
281 | /// to determine if its contents match the glob pattern. This is possible |
282 | /// if the program lacks the appropriate permissions, for example. |
283 | #[derive(Debug)] |
284 | pub struct GlobError { |
285 | path: PathBuf, |
286 | error: io::Error, |
287 | } |
288 | |
289 | impl GlobError { |
290 | /// The Path that the error corresponds to. |
291 | pub fn path(&self) -> &Path { |
292 | &self.path |
293 | } |
294 | |
295 | /// The error in question. |
296 | pub fn error(&self) -> &io::Error { |
297 | &self.error |
298 | } |
299 | |
300 | /// Consumes self, returning the _raw_ underlying `io::Error` |
301 | pub fn into_error(self) -> io::Error { |
302 | self.error |
303 | } |
304 | } |
305 | |
306 | impl Error for GlobError { |
307 | #[allow(deprecated)] |
308 | fn description(&self) -> &str { |
309 | self.error.description() |
310 | } |
311 | |
312 | #[allow(unknown_lints, bare_trait_objects)] |
313 | fn cause(&self) -> Option<&dynError> { |
314 | Some(&self.error) |
315 | } |
316 | } |
317 | |
318 | impl fmt::Display for GlobError { |
319 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
320 | write!( |
321 | f, |
322 | "attempting to read `{} ` resulted in an error:{} ", |
323 | self.path.display(), |
324 | self.error |
325 | ) |
326 | } |
327 | } |
328 | |
329 | #[derive(Debug)] |
330 | struct PathWrapper { |
331 | path: PathBuf, |
332 | is_directory: bool, |
333 | } |
334 | |
335 | impl PathWrapper { |
336 | fn from_dir_entry(path: PathBuf, e: DirEntry) -> Self { |
337 | let is_directory = e |
338 | .file_type() |
339 | .ok() |
340 | .and_then(|file_type| { |
341 | // We need to use fs::metadata to resolve the actual path |
342 | // if it's a symlink. |
343 | if file_type.is_symlink() { |
344 | None |
345 | } else { |
346 | Some(file_type.is_dir()) |
347 | } |
348 | }) |
349 | .or_else(|| fs::metadata(&path).map(|m| m.is_dir()).ok()) |
350 | .unwrap_or(false); |
351 | Self { path, is_directory } |
352 | } |
353 | fn from_path(path: PathBuf) -> Self { |
354 | let is_directory = fs::metadata(&path).map(|m| m.is_dir()).unwrap_or(false); |
355 | Self { path, is_directory } |
356 | } |
357 | |
358 | fn into_path(self) -> PathBuf { |
359 | self.path |
360 | } |
361 | } |
362 | |
363 | impl Deref for PathWrapper { |
364 | type Target = Path; |
365 | |
366 | fn deref(&self) -> &Self::Target { |
367 | self.path.deref() |
368 | } |
369 | } |
370 | |
371 | impl AsRef<Path> for PathWrapper { |
372 | fn as_ref(&self) -> &Path { |
373 | self.path.as_ref() |
374 | } |
375 | } |
376 | |
377 | /// An alias for a glob iteration result. |
378 | /// |
379 | /// This represents either a matched path or a glob iteration error, |
380 | /// such as failing to read a particular directory's contents. |
381 | pub type GlobResult = Result<PathBuf, GlobError>; |
382 | |
383 | impl Iterator for Paths { |
384 | type Item = GlobResult; |
385 | |
386 | fn next(&mut self) -> Option<GlobResult> { |
387 | // the todo buffer hasn't been initialized yet, so it's done at this |
388 | // point rather than in glob() so that the errors are unified that is, |
389 | // failing to fill the buffer is an iteration error construction of the |
390 | // iterator (i.e. glob()) only fails if it fails to compile the Pattern |
391 | if let Some(scope) = self.scope.take() { |
392 | if !self.dir_patterns.is_empty() { |
393 | // Shouldn't happen, but we're using -1 as a special index. |
394 | assert!(self.dir_patterns.len() < !0 as usize); |
395 | |
396 | fill_todo(&mut self.todo, &self.dir_patterns, 0, &scope, self.options); |
397 | } |
398 | } |
399 | |
400 | loop { |
401 | if self.dir_patterns.is_empty() || self.todo.is_empty() { |
402 | return None; |
403 | } |
404 | |
405 | let (path, mut idx) = match self.todo.pop().unwrap() { |
406 | Ok(pair) => pair, |
407 | Err(e) => return Some(Err(e)), |
408 | }; |
409 | |
410 | // idx -1: was already checked by fill_todo, maybe path was '.' or |
411 | // '..' that we can't match here because of normalization. |
412 | if idx == !0 as usize { |
413 | if self.require_dir && !path.is_directory { |
414 | continue; |
415 | } |
416 | return Some(Ok(path.into_path())); |
417 | } |
418 | |
419 | if self.dir_patterns[idx].is_recursive { |
420 | let mut next = idx; |
421 | |
422 | // collapse consecutive recursive patterns |
423 | while (next + 1) < self.dir_patterns.len() |
424 | && self.dir_patterns[next + 1].is_recursive |
425 | { |
426 | next += 1; |
427 | } |
428 | |
429 | if path.is_directory { |
430 | // the path is a directory, so it's a match |
431 | |
432 | // push this directory's contents |
433 | fill_todo( |
434 | &mut self.todo, |
435 | &self.dir_patterns, |
436 | next, |
437 | &path, |
438 | self.options, |
439 | ); |
440 | |
441 | if next == self.dir_patterns.len() - 1 { |
442 | // pattern ends in recursive pattern, so return this |
443 | // directory as a result |
444 | return Some(Ok(path.into_path())); |
445 | } else { |
446 | // advanced to the next pattern for this path |
447 | idx = next + 1; |
448 | } |
449 | } else if next == self.dir_patterns.len() - 1 { |
450 | // not a directory and it's the last pattern, meaning no |
451 | // match |
452 | continue; |
453 | } else { |
454 | // advanced to the next pattern for this path |
455 | idx = next + 1; |
456 | } |
457 | } |
458 | |
459 | // not recursive, so match normally |
460 | if self.dir_patterns[idx].matches_with( |
461 | { |
462 | match path.file_name().and_then(|s| s.to_str()) { |
463 | // FIXME (#9639): How do we handle non-utf8 filenames? |
464 | // Ignore them for now; ideally we'd still match them |
465 | // against a * |
466 | None => continue, |
467 | Some(x) => x, |
468 | } |
469 | }, |
470 | self.options, |
471 | ) { |
472 | if idx == self.dir_patterns.len() - 1 { |
473 | // it is not possible for a pattern to match a directory |
474 | // *AND* its children so we don't need to check the |
475 | // children |
476 | |
477 | if !self.require_dir || path.is_directory { |
478 | return Some(Ok(path.into_path())); |
479 | } |
480 | } else { |
481 | fill_todo( |
482 | &mut self.todo, |
483 | &self.dir_patterns, |
484 | idx + 1, |
485 | &path, |
486 | self.options, |
487 | ); |
488 | } |
489 | } |
490 | } |
491 | } |
492 | } |
493 | |
494 | /// A pattern parsing error. |
495 | #[derive(Debug)] |
496 | #[allow(missing_copy_implementations)] |
497 | pub struct PatternError { |
498 | /// The approximate character index of where the error occurred. |
499 | pub pos: usize, |
500 | |
501 | /// A message describing the error. |
502 | pub msg: &'static str, |
503 | } |
504 | |
505 | impl Error for PatternError { |
506 | fn description(&self) -> &str { |
507 | self.msg |
508 | } |
509 | } |
510 | |
511 | impl fmt::Display for PatternError { |
512 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
513 | write!( |
514 | f, |
515 | "Pattern syntax error near position{} :{} ", |
516 | self.pos, self.msg |
517 | ) |
518 | } |
519 | } |
520 | |
521 | /// A compiled Unix shell style pattern. |
522 | /// |
523 | /// - `?` matches any single character. |
524 | /// |
525 | /// - `*` matches any (possibly empty) sequence of characters. |
526 | /// |
527 | /// - `**` matches the current directory and arbitrary |
528 | /// subdirectories. To match files in arbitrary subdiretories, use |
529 | /// `**/*`. |
530 | /// |
531 | /// This sequence **must** form a single path component, so both |
532 | /// `**a` and `b**` are invalid and will result in an error. A |
533 | /// sequence of more than two consecutive `*` characters is also |
534 | /// invalid. |
535 | /// |
536 | /// - `[...]` matches any character inside the brackets. Character sequences |
537 | /// can also specify ranges of characters, as ordered by Unicode, so e.g. |
538 | /// `[0-9]` specifies any character between 0 and 9 inclusive. An unclosed |
539 | /// bracket is invalid. |
540 | /// |
541 | /// - `[!...]` is the negation of `[...]`, i.e. it matches any characters |
542 | /// **not** in the brackets. |
543 | /// |
544 | /// - The metacharacters `?`, `*`, `[`, `]` can be matched by using brackets |
545 | /// (e.g. `[?]`). When a `]` occurs immediately following `[` or `[!` then it |
546 | /// is interpreted as being part of, rather then ending, the character set, so |
547 | /// `]` and NOT `]` can be matched by `[]]` and `[!]]` respectively. The `-` |
548 | /// character can be specified inside a character sequence pattern by placing |
549 | /// it at the start or the end, e.g. `[abc-]`. |
550 | #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)] |
551 | pub struct Pattern { |
552 | original: String, |
553 | tokens: Vec<PatternToken>, |
554 | is_recursive: bool, |
555 | } |
556 | |
557 | /// Show the original glob pattern. |
558 | impl fmt::Display for Pattern { |
559 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
560 | self.original.fmt(f) |
561 | } |
562 | } |
563 | |
564 | impl FromStr for Pattern { |
565 | type Err = PatternError; |
566 | |
567 | fn from_str(s: &str) -> Result<Self, PatternError> { |
568 | Self::new(pattern:s) |
569 | } |
570 | } |
571 | |
572 | #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] |
573 | enum PatternToken { |
574 | Char(char), |
575 | AnyChar, |
576 | AnySequence, |
577 | AnyRecursiveSequence, |
578 | AnyWithin(Vec<CharSpecifier>), |
579 | AnyExcept(Vec<CharSpecifier>), |
580 | } |
581 | |
582 | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] |
583 | enum CharSpecifier { |
584 | SingleChar(char), |
585 | CharRange(char, char), |
586 | } |
587 | |
588 | #[derive(Copy, Clone, PartialEq)] |
589 | enum MatchResult { |
590 | Match, |
591 | SubPatternDoesntMatch, |
592 | EntirePatternDoesntMatch, |
593 | } |
594 | |
595 | const ERROR_WILDCARDS: &str = "wildcards are either regular `*` or recursive `**`"; |
596 | const ERROR_RECURSIVE_WILDCARDS: &str = "recursive wildcards must form a single path \ |
597 | component"; |
598 | const ERROR_INVALID_RANGE: &str = "invalid range pattern"; |
599 | |
600 | impl Pattern { |
601 | /// This function compiles Unix shell style patterns. |
602 | /// |
603 | /// An invalid glob pattern will yield a `PatternError`. |
604 | pub fn new(pattern: &str) -> Result<Self, PatternError> { |
605 | let chars = pattern.chars().collect::<Vec<_>>(); |
606 | let mut tokens = Vec::new(); |
607 | let mut is_recursive = false; |
608 | let mut i = 0; |
609 | |
610 | while i < chars.len() { |
611 | match chars[i] { |
612 | '?'=> { |
613 | tokens.push(AnyChar); |
614 | i += 1; |
615 | } |
616 | '*'=> { |
617 | let old = i; |
618 | |
619 | while i < chars.len() && chars[i] == '*'{ |
620 | i += 1; |
621 | } |
622 | |
623 | let count = i - old; |
624 | |
625 | if count > 2 { |
626 | return Err(PatternError { |
627 | pos: old + 2, |
628 | msg: ERROR_WILDCARDS, |
629 | }); |
630 | } else if count == 2 { |
631 | // ** can only be an entire path component |
632 | // i.e. a/**/b is valid, but a**/b or a/**b is not |
633 | // invalid matches are treated literally |
634 | let is_valid = if i == 2 || path::is_separator(chars[i - count - 1]) { |
635 | // it ends in a '/' |
636 | if i < chars.len() && path::is_separator(chars[i]) { |
637 | i += 1; |
638 | true |
639 | // or the pattern ends here |
640 | // this enables the existing globbing mechanism |
641 | } else if i == chars.len() { |
642 | true |
643 | // `**` ends in non-separator |
644 | } else { |
645 | return Err(PatternError { |
646 | pos: i, |
647 | msg: ERROR_RECURSIVE_WILDCARDS, |
648 | }); |
649 | } |
650 | // `**` begins with non-separator |
651 | } else { |
652 | return Err(PatternError { |
653 | pos: old - 1, |
654 | msg: ERROR_RECURSIVE_WILDCARDS, |
655 | }); |
656 | }; |
657 | |
658 | if is_valid { |
659 | // collapse consecutive AnyRecursiveSequence to a |
660 | // single one |
661 | |
662 | let tokens_len = tokens.len(); |
663 | |
664 | if !(tokens_len > 1 && tokens[tokens_len - 1] == AnyRecursiveSequence) { |
665 | is_recursive = true; |
666 | tokens.push(AnyRecursiveSequence); |
667 | } |
668 | } |
669 | } else { |
670 | tokens.push(AnySequence); |
671 | } |
672 | } |
673 | '['=> { |
674 | if i + 4 <= chars.len() && chars[i + 1] == '!'{ |
675 | match chars[i + 3..].iter().position(|x| *x == ']') { |
676 | None => (), |
677 | Some(j) => { |
678 | let chars = &chars[i + 2..i + 3 + j]; |
679 | let cs = parse_char_specifiers(chars); |
680 | tokens.push(AnyExcept(cs)); |
681 | i += j + 4; |
682 | continue; |
683 | } |
684 | } |
685 | } else if i + 3 <= chars.len() && chars[i + 1] != '!'{ |
686 | match chars[i + 2..].iter().position(|x| *x == ']') { |
687 | None => (), |
688 | Some(j) => { |
689 | let cs = parse_char_specifiers(&chars[i + 1..i + 2 + j]); |
690 | tokens.push(AnyWithin(cs)); |
691 | i += j + 3; |
692 | continue; |
693 | } |
694 | } |
695 | } |
696 | |
697 | // if we get here then this is not a valid range pattern |
698 | return Err(PatternError { |
699 | pos: i, |
700 | msg: ERROR_INVALID_RANGE, |
701 | }); |
702 | } |
703 | c => { |
704 | tokens.push(Char(c)); |
705 | i += 1; |
706 | } |
707 | } |
708 | } |
709 | |
710 | Ok(Self { |
711 | tokens, |
712 | original: pattern.to_string(), |
713 | is_recursive, |
714 | }) |
715 | } |
716 | |
717 | /// Escape metacharacters within the given string by surrounding them in |
718 | /// brackets. The resulting string will, when compiled into a `Pattern`, |
719 | /// match the input string and nothing else. |
720 | pub fn escape(s: &str) -> String { |
721 | let mut escaped = String::new(); |
722 | for c in s.chars() { |
723 | match c { |
724 | // note that ! does not need escaping because it is only special |
725 | // inside brackets |
726 | '?'| '*'| '['| ']'=> { |
727 | escaped.push('['); |
728 | escaped.push(c); |
729 | escaped.push(']'); |
730 | } |
731 | c => { |
732 | escaped.push(c); |
733 | } |
734 | } |
735 | } |
736 | escaped |
737 | } |
738 | |
739 | /// Return if the given `str` matches this `Pattern` using the default |
740 | /// match options (i.e. `MatchOptions::new()`). |
741 | /// |
742 | /// # Examples |
743 | /// |
744 | /// ```rust |
745 | /// use glob::Pattern; |
746 | /// |
747 | /// assert!(Pattern::new("c?t").unwrap().matches( "cat")); |
748 | /// assert!(Pattern::new("k[!e]tteh").unwrap().matches( "kitteh")); |
749 | /// assert!(Pattern::new("d*g").unwrap().matches( "doog")); |
750 | /// ``` |
751 | pub fn matches(&self, str: &str) -> bool { |
752 | self.matches_with(str, MatchOptions::new()) |
753 | } |
754 | |
755 | /// Return if the given `Path`, when converted to a `str`, matches this |
756 | /// `Pattern` using the default match options (i.e. `MatchOptions::new()`). |
757 | pub fn matches_path(&self, path: &Path) -> bool { |
758 | // FIXME (#9639): This needs to handle non-utf8 paths |
759 | path.to_str().map_or(false, |s| self.matches(s)) |
760 | } |
761 | |
762 | /// Return if the given `str` matches this `Pattern` using the specified |
763 | /// match options. |
764 | pub fn matches_with(&self, str: &str, options: MatchOptions) -> bool { |
765 | self.matches_from(true, str.chars(), 0, options) == Match |
766 | } |
767 | |
768 | /// Return if the given `Path`, when converted to a `str`, matches this |
769 | /// `Pattern` using the specified match options. |
770 | pub fn matches_path_with(&self, path: &Path, options: MatchOptions) -> bool { |
771 | // FIXME (#9639): This needs to handle non-utf8 paths |
772 | path.to_str() |
773 | .map_or(false, |s| self.matches_with(s, options)) |
774 | } |
775 | |
776 | /// Access the original glob pattern. |
777 | pub fn as_str(&self) -> &str { |
778 | &self.original |
779 | } |
780 | |
781 | fn matches_from( |
782 | &self, |
783 | mut follows_separator: bool, |
784 | mut file: std::str::Chars, |
785 | i: usize, |
786 | options: MatchOptions, |
787 | ) -> MatchResult { |
788 | for (ti, token) in self.tokens[i..].iter().enumerate() { |
789 | match *token { |
790 | AnySequence | AnyRecursiveSequence => { |
791 | // ** must be at the start. |
792 | debug_assert!(match *token { |
793 | AnyRecursiveSequence => follows_separator, |
794 | _ => true, |
795 | }); |
796 | |
797 | // Empty match |
798 | match self.matches_from(follows_separator, file.clone(), i + ti + 1, options) { |
799 | SubPatternDoesntMatch => (), // keep trying |
800 | m => return m, |
801 | }; |
802 | |
803 | while let Some(c) = file.next() { |
804 | if follows_separator && options.require_literal_leading_dot && c == '.'{ |
805 | return SubPatternDoesntMatch; |
806 | } |
807 | follows_separator = path::is_separator(c); |
808 | match *token { |
809 | AnyRecursiveSequence if !follows_separator => continue, |
810 | AnySequence |
811 | if options.require_literal_separator && follows_separator => |
812 | { |
813 | return SubPatternDoesntMatch |
814 | } |
815 | _ => (), |
816 | } |
817 | match self.matches_from( |
818 | follows_separator, |
819 | file.clone(), |
820 | i + ti + 1, |
821 | options, |
822 | ) { |
823 | SubPatternDoesntMatch => (), // keep trying |
824 | m => return m, |
825 | } |
826 | } |
827 | } |
828 | _ => { |
829 | let c = match file.next() { |
830 | Some(c) => c, |
831 | None => return EntirePatternDoesntMatch, |
832 | }; |
833 | |
834 | let is_sep = path::is_separator(c); |
835 | |
836 | if !match *token { |
837 | AnyChar | AnyWithin(..) | AnyExcept(..) |
838 | if (options.require_literal_separator && is_sep) |
839 | || (follows_separator |
840 | && options.require_literal_leading_dot |
841 | && c == '.') => |
842 | { |
843 | false |
844 | } |
845 | AnyChar => true, |
846 | AnyWithin(ref specifiers) => in_char_specifiers(&specifiers, c, options), |
847 | AnyExcept(ref specifiers) => !in_char_specifiers(&specifiers, c, options), |
848 | Char(c2) => chars_eq(c, c2, options.case_sensitive), |
849 | AnySequence | AnyRecursiveSequence => unreachable!(), |
850 | } { |
851 | return SubPatternDoesntMatch; |
852 | } |
853 | follows_separator = is_sep; |
854 | } |
855 | } |
856 | } |
857 | |
858 | // Iter is fused. |
859 | if file.next().is_none() { |
860 | Match |
861 | } else { |
862 | SubPatternDoesntMatch |
863 | } |
864 | } |
865 | } |
866 | |
867 | // Fills `todo` with paths under `path` to be matched by `patterns[idx]`, |
868 | // special-casing patterns to match `.` and `..`, and avoiding `readdir()` |
869 | // calls when there are no metacharacters in the pattern. |
870 | fn fill_todo( |
871 | todo: &mut Vec<Result<(PathWrapper, usize), GlobError>>, |
872 | patterns: &[Pattern], |
873 | idx: usize, |
874 | path: &PathWrapper, |
875 | options: MatchOptions, |
876 | ) { |
877 | // convert a pattern that's just many Char(_) to a string |
878 | fn pattern_as_str(pattern: &Pattern) -> Option<String> { |
879 | let mut s = String::new(); |
880 | for token in &pattern.tokens { |
881 | match *token { |
882 | Char(c) => s.push(c), |
883 | _ => return None, |
884 | } |
885 | } |
886 | |
887 | Some(s) |
888 | } |
889 | |
890 | let add = |todo: &mut Vec<_>, next_path: PathWrapper| { |
891 | if idx + 1 == patterns.len() { |
892 | // We know it's good, so don't make the iterator match this path |
893 | // against the pattern again. In particular, it can't match |
894 | // . or .. globs since these never show up as path components. |
895 | todo.push(Ok((next_path, !0 as usize))); |
896 | } else { |
897 | fill_todo(todo, patterns, idx + 1, &next_path, options); |
898 | } |
899 | }; |
900 | |
901 | let pattern = &patterns[idx]; |
902 | let is_dir = path.is_directory; |
903 | let curdir = path.as_ref() == Path::new("."); |
904 | match pattern_as_str(pattern) { |
905 | Some(s) => { |
906 | // This pattern component doesn't have any metacharacters, so we |
907 | // don't need to read the current directory to know where to |
908 | // continue. So instead of passing control back to the iterator, |
909 | // we can just check for that one entry and potentially recurse |
910 | // right away. |
911 | let special = "."== s || ".."== s; |
912 | let next_path = if curdir { |
913 | PathBuf::from(s) |
914 | } else { |
915 | path.join(&s) |
916 | }; |
917 | let next_path = PathWrapper::from_path(next_path); |
918 | if (special && is_dir) |
919 | || (!special |
920 | && (fs::metadata(&next_path).is_ok() |
921 | || fs::symlink_metadata(&next_path).is_ok())) |
922 | { |
923 | add(todo, next_path); |
924 | } |
925 | } |
926 | None if is_dir => { |
927 | let dirs = fs::read_dir(path).and_then(|d| { |
928 | d.map(|e| { |
929 | e.map(|e| { |
930 | let path = if curdir { |
931 | PathBuf::from(e.path().file_name().unwrap()) |
932 | } else { |
933 | e.path() |
934 | }; |
935 | PathWrapper::from_dir_entry(path, e) |
936 | }) |
937 | }) |
938 | .collect::<Result<Vec<_>, _>>() |
939 | }); |
940 | match dirs { |
941 | Ok(mut children) => { |
942 | if options.require_literal_leading_dot { |
943 | children |
944 | .retain(|x| !x.file_name().unwrap().to_str().unwrap().starts_with(".")); |
945 | } |
946 | children.sort_by(|p1, p2| p2.file_name().cmp(&p1.file_name())); |
947 | todo.extend(children.into_iter().map(|x| Ok((x, idx)))); |
948 | |
949 | // Matching the special directory entries . and .. that |
950 | // refer to the current and parent directory respectively |
951 | // requires that the pattern has a leading dot, even if the |
952 | // `MatchOptions` field `require_literal_leading_dot` is not |
953 | // set. |
954 | if !pattern.tokens.is_empty() && pattern.tokens[0] == Char('.') { |
955 | for &special in &[".", ".."] { |
956 | if pattern.matches_with(special, options) { |
957 | add(todo, PathWrapper::from_path(path.join(special))); |
958 | } |
959 | } |
960 | } |
961 | } |
962 | Err(e) => { |
963 | todo.push(Err(GlobError { |
964 | path: path.to_path_buf(), |
965 | error: e, |
966 | })); |
967 | } |
968 | } |
969 | } |
970 | None => { |
971 | // not a directory, nothing more to find |
972 | } |
973 | } |
974 | } |
975 | |
976 | fn parse_char_specifiers(s: &[char]) -> Vec<CharSpecifier> { |
977 | let mut cs: Vec |
978 | let mut i: usize = 0; |
979 | while i < s.len() { |
980 | if i + 3 <= s.len() && s[i + 1] == '-'{ |
981 | cs.push(CharRange(s[i], s[i + 2])); |
982 | i += 3; |
983 | } else { |
984 | cs.push(SingleChar(s[i])); |
985 | i += 1; |
986 | } |
987 | } |
988 | cs |
989 | } |
990 | |
991 | fn in_char_specifiers(specifiers: &[CharSpecifier], c: char, options: MatchOptions) -> bool { |
992 | for &specifier in specifiers.iter() { |
993 | match specifier { |
994 | SingleChar(sc) => { |
995 | if chars_eq(c, sc, options.case_sensitive) { |
996 | return true; |
997 | } |
998 | } |
999 | CharRange(start, end) => { |
1000 | // FIXME: work with non-ascii chars properly (issue #1347) |
1001 | if !options.case_sensitive && c.is_ascii() && start.is_ascii() && end.is_ascii() { |
1002 | let start = start.to_ascii_lowercase(); |
1003 | let end = end.to_ascii_lowercase(); |
1004 | |
1005 | let start_up = start.to_uppercase().next().unwrap(); |
1006 | let end_up = end.to_uppercase().next().unwrap(); |
1007 | |
1008 | // only allow case insensitive matching when |
1009 | // both start and end are within a-z or A-Z |
1010 | if start != start_up && end != end_up { |
1011 | let c = c.to_ascii_lowercase(); |
1012 | if c >= start && c <= end { |
1013 | return true; |
1014 | } |
1015 | } |
1016 | } |
1017 | |
1018 | if c >= start && c <= end { |
1019 | return true; |
1020 | } |
1021 | } |
1022 | } |
1023 | } |
1024 | |
1025 | false |
1026 | } |
1027 | |
1028 | /// A helper function to determine if two chars are (possibly case-insensitively) equal. |
1029 | fn chars_eq(a: char, b: char, case_sensitive: bool) -> bool { |
1030 | if cfg!(windows) && path::is_separator(a) && path::is_separator(b) { |
1031 | true |
1032 | } else if !case_sensitive && a.is_ascii() && b.is_ascii() { |
1033 | // FIXME: work with non-ascii chars properly (issue #9084) |
1034 | a.to_ascii_lowercase() == b.to_ascii_lowercase() |
1035 | } else { |
1036 | a == b |
1037 | } |
1038 | } |
1039 | |
1040 | /// Configuration options to modify the behaviour of `Pattern::matches_with(..)`. |
1041 | #[allow(missing_copy_implementations)] |
1042 | #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] |
1043 | pub struct MatchOptions { |
1044 | /// Whether or not patterns should be matched in a case-sensitive manner. |
1045 | /// This currently only considers upper/lower case relationships between |
1046 | /// ASCII characters, but in future this might be extended to work with |
1047 | /// Unicode. |
1048 | pub case_sensitive: bool, |
1049 | |
1050 | /// Whether or not path-component separator characters (e.g. `/` on |
1051 | /// Posix) must be matched by a literal `/`, rather than by `*` or `?` or |
1052 | /// `[...]`. |
1053 | pub require_literal_separator: bool, |
1054 | |
1055 | /// Whether or not paths that contain components that start with a `.` |
1056 | /// will require that `.` appears literally in the pattern; `*`, `?`, `**`, |
1057 | /// or `[...]` will not match. This is useful because such files are |
1058 | /// conventionally considered hidden on Unix systems and it might be |
1059 | /// desirable to skip them when listing files. |
1060 | pub require_literal_leading_dot: bool, |
1061 | } |
1062 | |
1063 | impl MatchOptions { |
1064 | /// Constructs a new `MatchOptions` with default field values. This is used |
1065 | /// when calling functions that do not take an explicit `MatchOptions` |
1066 | /// parameter. |
1067 | /// |
1068 | /// This function always returns this value: |
1069 | /// |
1070 | /// ```rust,ignore |
1071 | /// MatchOptions { |
1072 | /// case_sensitive: true, |
1073 | /// require_literal_separator: false, |
1074 | /// require_literal_leading_dot: false |
1075 | /// } |
1076 | /// ``` |
1077 | /// |
1078 | /// # Note |
1079 | /// The behavior of this method doesn't match `default()`'s. This returns |
1080 | /// `case_sensitive` as `true` while `default()` does it as `false`. |
1081 | // FIXME: Consider unity the behavior with `default()` in a next major release. |
1082 | pub fn new() -> Self { |
1083 | Self { |
1084 | case_sensitive: true, |
1085 | require_literal_separator: false, |
1086 | require_literal_leading_dot: false, |
1087 | } |
1088 | } |
1089 | } |
1090 | |
1091 | #[cfg(test)] |
1092 | mod test { |
1093 | use super::{glob, MatchOptions, Pattern}; |
1094 | use std::path::Path; |
1095 | |
1096 | #[test] |
1097 | fn test_pattern_from_str() { |
1098 | assert!("a*b".parse::<Pattern>().unwrap().matches( "a_b")); |
1099 | assert!("a/**b".parse::<Pattern>().unwrap_err().pos == 4); |
1100 | } |
1101 | |
1102 | #[test] |
1103 | fn test_wildcard_errors() { |
1104 | assert!(Pattern::new("a/**b").unwrap_err().pos == 4); |
1105 | assert!(Pattern::new("a/bc**").unwrap_err().pos == 3); |
1106 | assert!(Pattern::new("a/*****").unwrap_err().pos == 4); |
1107 | assert!(Pattern::new("a/b**c**d").unwrap_err().pos == 2); |
1108 | assert!(Pattern::new("a**b").unwrap_err().pos == 0); |
1109 | } |
1110 | |
1111 | #[test] |
1112 | fn test_unclosed_bracket_errors() { |
1113 | assert!(Pattern::new("abc[def").unwrap_err().pos == 3); |
1114 | assert!(Pattern::new("abc[!def").unwrap_err().pos == 3); |
1115 | assert!(Pattern::new("abc[").unwrap_err().pos == 3); |
1116 | assert!(Pattern::new("abc[!").unwrap_err().pos == 3); |
1117 | assert!(Pattern::new("abc[d").unwrap_err().pos == 3); |
1118 | assert!(Pattern::new("abc[!d").unwrap_err().pos == 3); |
1119 | assert!(Pattern::new("abc[]").unwrap_err().pos == 3); |
1120 | assert!(Pattern::new("abc[!]").unwrap_err().pos == 3); |
1121 | } |
1122 | |
1123 | #[test] |
1124 | fn test_glob_errors() { |
1125 | assert!(glob("a/**b").err().unwrap().pos == 4); |
1126 | assert!(glob("abc[def").err().unwrap().pos == 3); |
1127 | } |
1128 | |
1129 | // this test assumes that there is a /root directory and that |
1130 | // the user running this test is not root or otherwise doesn't |
1131 | // have permission to read its contents |
1132 | #[cfg(all(unix, not(target_os = "macos")))] |
1133 | #[test] |
1134 | fn test_iteration_errors() { |
1135 | use std::io; |
1136 | let mut iter = glob("/root/*").unwrap(); |
1137 | |
1138 | // GlobErrors shouldn't halt iteration |
1139 | let next = iter.next(); |
1140 | assert!(next.is_some()); |
1141 | |
1142 | let err = next.unwrap(); |
1143 | assert!(err.is_err()); |
1144 | |
1145 | let err = err.err().unwrap(); |
1146 | assert!(err.path() == Path::new("/root")); |
1147 | assert!(err.error().kind() == io::ErrorKind::PermissionDenied); |
1148 | } |
1149 | |
1150 | #[test] |
1151 | fn test_absolute_pattern() { |
1152 | assert!(glob("/").unwrap().next().is_some()); |
1153 | assert!(glob("//").unwrap().next().is_some()); |
1154 | |
1155 | // assume that the filesystem is not empty! |
1156 | assert!(glob("/*").unwrap().next().is_some()); |
1157 | |
1158 | #[cfg(not(windows))] |
1159 | fn win() {} |
1160 | |
1161 | #[cfg(windows)] |
1162 | fn win() { |
1163 | use std::env::current_dir; |
1164 | use std::path::Component; |
1165 | |
1166 | // check windows absolute paths with host/device components |
1167 | let root_with_device = current_dir() |
1168 | .ok() |
1169 | .and_then(|p| match p.components().next().unwrap() { |
1170 | Component::Prefix(prefix_component) => { |
1171 | let path = Path::new(prefix_component.as_os_str()); |
1172 | path.join("*"); |
1173 | Some(path.to_path_buf()) |
1174 | } |
1175 | _ => panic!("no prefix in this path"), |
1176 | }) |
1177 | .unwrap(); |
1178 | // FIXME (#9639): This needs to handle non-utf8 paths |
1179 | assert!(glob(root_with_device.as_os_str().to_str().unwrap()) |
1180 | .unwrap() |
1181 | .next() |
1182 | .is_some()); |
1183 | } |
1184 | win() |
1185 | } |
1186 | |
1187 | #[test] |
1188 | fn test_wildcards() { |
1189 | assert!(Pattern::new("a*b").unwrap().matches( "a_b")); |
1190 | assert!(Pattern::new("a*b*c").unwrap().matches( "abc")); |
1191 | assert!(!Pattern::new("a*b*c").unwrap().matches( "abcd")); |
1192 | assert!(Pattern::new("a*b*c").unwrap().matches( "a_b_c")); |
1193 | assert!(Pattern::new("a*b*c").unwrap().matches( "a___b___c")); |
1194 | assert!(Pattern::new("abc*abc*abc") |
1195 | .unwrap() |
1196 | .matches("abcabcabcabcabcabcabc")); |
1197 | assert!(!Pattern::new("abc*abc*abc") |
1198 | .unwrap() |
1199 | .matches("abcabcabcabcabcabcabca")); |
1200 | assert!(Pattern::new("a*a*a*a*a*a*a*a*a") |
1201 | .unwrap() |
1202 | .matches("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")); |
1203 | assert!(Pattern::new("a*b[xyz]c*d").unwrap().matches( "abxcdbxcddd")); |
1204 | } |
1205 | |
1206 | #[test] |
1207 | fn test_recursive_wildcards() { |
1208 | let pat = Pattern::new("some/**/needle.txt").unwrap(); |
1209 | assert!(pat.matches("some/needle.txt")); |
1210 | assert!(pat.matches("some/one/needle.txt")); |
1211 | assert!(pat.matches("some/one/two/needle.txt")); |
1212 | assert!(pat.matches("some/other/needle.txt")); |
1213 | assert!(!pat.matches("some/other/notthis.txt")); |
1214 | |
1215 | // a single ** should be valid, for globs |
1216 | // Should accept anything |
1217 | let pat = Pattern::new("**").unwrap(); |
1218 | assert!(pat.is_recursive); |
1219 | assert!(pat.matches("abcde")); |
1220 | assert!(pat.matches("")); |
1221 | assert!(pat.matches(".asdf")); |
1222 | assert!(pat.matches("/x/.asdf")); |
1223 | |
1224 | // collapse consecutive wildcards |
1225 | let pat = Pattern::new("some/**/**/needle.txt").unwrap(); |
1226 | assert!(pat.matches("some/needle.txt")); |
1227 | assert!(pat.matches("some/one/needle.txt")); |
1228 | assert!(pat.matches("some/one/two/needle.txt")); |
1229 | assert!(pat.matches("some/other/needle.txt")); |
1230 | assert!(!pat.matches("some/other/notthis.txt")); |
1231 | |
1232 | // ** can begin the pattern |
1233 | let pat = Pattern::new("**/test").unwrap(); |
1234 | assert!(pat.matches("one/two/test")); |
1235 | assert!(pat.matches("one/test")); |
1236 | assert!(pat.matches("test")); |
1237 | |
1238 | // /** can begin the pattern |
1239 | let pat = Pattern::new("/**/test").unwrap(); |
1240 | assert!(pat.matches("/one/two/test")); |
1241 | assert!(pat.matches("/one/test")); |
1242 | assert!(pat.matches("/test")); |
1243 | assert!(!pat.matches("/one/notthis")); |
1244 | assert!(!pat.matches("/notthis")); |
1245 | |
1246 | // Only start sub-patterns on start of path segment. |
1247 | let pat = Pattern::new("**/.*").unwrap(); |
1248 | assert!(pat.matches(".abc")); |
1249 | assert!(pat.matches("abc/.abc")); |
1250 | assert!(!pat.matches("ab.c")); |
1251 | assert!(!pat.matches("abc/ab.c")); |
1252 | } |
1253 | |
1254 | #[test] |
1255 | fn test_lots_of_files() { |
1256 | // this is a good test because it touches lots of differently named files |
1257 | glob("/*/*/*/*").unwrap().skip(10000).next(); |
1258 | } |
1259 | |
1260 | #[test] |
1261 | fn test_range_pattern() { |
1262 | let pat = Pattern::new("a[0-9]b").unwrap(); |
1263 | for i in 0..10 { |
1264 | assert!(pat.matches(&format!("a{}b", i))); |
1265 | } |
1266 | assert!(!pat.matches("a_b")); |
1267 | |
1268 | let pat = Pattern::new("a[!0-9]b").unwrap(); |
1269 | for i in 0..10 { |
1270 | assert!(!pat.matches(&format!("a{}b", i))); |
1271 | } |
1272 | assert!(pat.matches("a_b")); |
1273 | |
1274 | let pats = ["[a-z123]", "[1a-z23]", "[123a-z]"]; |
1275 | for &p in pats.iter() { |
1276 | let pat = Pattern::new(p).unwrap(); |
1277 | for c in "abcdefghijklmnopqrstuvwxyz".chars() { |
1278 | assert!(pat.matches(&c.to_string())); |
1279 | } |
1280 | for c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ".chars() { |
1281 | let options = MatchOptions { |
1282 | case_sensitive: false, |
1283 | ..MatchOptions::new() |
1284 | }; |
1285 | assert!(pat.matches_with(&c.to_string(), options)); |
1286 | } |
1287 | assert!(pat.matches("1")); |
1288 | assert!(pat.matches("2")); |
1289 | assert!(pat.matches("3")); |
1290 | } |
1291 | |
1292 | let pats = ["[abc-]", "[-abc]", "[a-c-]"]; |
1293 | for &p in pats.iter() { |
1294 | let pat = Pattern::new(p).unwrap(); |
1295 | assert!(pat.matches("a")); |
1296 | assert!(pat.matches("b")); |
1297 | assert!(pat.matches("c")); |
1298 | assert!(pat.matches("-")); |
1299 | assert!(!pat.matches("d")); |
1300 | } |
1301 | |
1302 | let pat = Pattern::new("[2-1]").unwrap(); |
1303 | assert!(!pat.matches("1")); |
1304 | assert!(!pat.matches("2")); |
1305 | |
1306 | assert!(Pattern::new("[-]").unwrap().matches( "-")); |
1307 | assert!(!Pattern::new("[!-]").unwrap().matches( "-")); |
1308 | } |
1309 | |
1310 | #[test] |
1311 | fn test_pattern_matches() { |
1312 | let txt_pat = Pattern::new("*hello.txt").unwrap(); |
1313 | assert!(txt_pat.matches("hello.txt")); |
1314 | assert!(txt_pat.matches("gareth_says_hello.txt")); |
1315 | assert!(txt_pat.matches("some/path/to/hello.txt")); |
1316 | assert!(txt_pat.matches("some\\ path\\ to\\ hello.txt")); |
1317 | assert!(txt_pat.matches("/an/absolute/path/to/hello.txt")); |
1318 | assert!(!txt_pat.matches("hello.txt-and-then-some")); |
1319 | assert!(!txt_pat.matches("goodbye.txt")); |
1320 | |
1321 | let dir_pat = Pattern::new("*some/path/to/hello.txt").unwrap(); |
1322 | assert!(dir_pat.matches("some/path/to/hello.txt")); |
1323 | assert!(dir_pat.matches("a/bigger/some/path/to/hello.txt")); |
1324 | assert!(!dir_pat.matches("some/path/to/hello.txt-and-then-some")); |
1325 | assert!(!dir_pat.matches("some/other/path/to/hello.txt")); |
1326 | } |
1327 | |
1328 | #[test] |
1329 | fn test_pattern_escape() { |
1330 | let s = "_[_]_?_*_!_"; |
1331 | assert_eq!(Pattern::escape(s), "_[[]_[]]_[?]_[*]_!_".to_string()); |
1332 | assert!(Pattern::new(&Pattern::escape(s)).unwrap().matches(s)); |
1333 | } |
1334 | |
1335 | #[test] |
1336 | fn test_pattern_matches_case_insensitive() { |
1337 | let pat = Pattern::new("aBcDeFg").unwrap(); |
1338 | let options = MatchOptions { |
1339 | case_sensitive: false, |
1340 | require_literal_separator: false, |
1341 | require_literal_leading_dot: false, |
1342 | }; |
1343 | |
1344 | assert!(pat.matches_with("aBcDeFg", options)); |
1345 | assert!(pat.matches_with("abcdefg", options)); |
1346 | assert!(pat.matches_with("ABCDEFG", options)); |
1347 | assert!(pat.matches_with("AbCdEfG", options)); |
1348 | } |
1349 | |
1350 | #[test] |
1351 | fn test_pattern_matches_case_insensitive_range() { |
1352 | let pat_within = Pattern::new("[a]").unwrap(); |
1353 | let pat_except = Pattern::new("[!a]").unwrap(); |
1354 | |
1355 | let options_case_insensitive = MatchOptions { |
1356 | case_sensitive: false, |
1357 | require_literal_separator: false, |
1358 | require_literal_leading_dot: false, |
1359 | }; |
1360 | let options_case_sensitive = MatchOptions { |
1361 | case_sensitive: true, |
1362 | require_literal_separator: false, |
1363 | require_literal_leading_dot: false, |
1364 | }; |
1365 | |
1366 | assert!(pat_within.matches_with("a", options_case_insensitive)); |
1367 | assert!(pat_within.matches_with("A", options_case_insensitive)); |
1368 | assert!(!pat_within.matches_with("A", options_case_sensitive)); |
1369 | |
1370 | assert!(!pat_except.matches_with("a", options_case_insensitive)); |
1371 | assert!(!pat_except.matches_with("A", options_case_insensitive)); |
1372 | assert!(pat_except.matches_with("A", options_case_sensitive)); |
1373 | } |
1374 | |
1375 | #[test] |
1376 | fn test_pattern_matches_require_literal_separator() { |
1377 | let options_require_literal = MatchOptions { |
1378 | case_sensitive: true, |
1379 | require_literal_separator: true, |
1380 | require_literal_leading_dot: false, |
1381 | }; |
1382 | let options_not_require_literal = MatchOptions { |
1383 | case_sensitive: true, |
1384 | require_literal_separator: false, |
1385 | require_literal_leading_dot: false, |
1386 | }; |
1387 | |
1388 | assert!(Pattern::new("abc/def") |
1389 | .unwrap() |
1390 | .matches_with("abc/def", options_require_literal)); |
1391 | assert!(!Pattern::new("abc?def") |
1392 | .unwrap() |
1393 | .matches_with("abc/def", options_require_literal)); |
1394 | assert!(!Pattern::new("abc*def") |
1395 | .unwrap() |
1396 | .matches_with("abc/def", options_require_literal)); |
1397 | assert!(!Pattern::new("abc[/]def") |
1398 | .unwrap() |
1399 | .matches_with("abc/def", options_require_literal)); |
1400 | |
1401 | assert!(Pattern::new("abc/def") |
1402 | .unwrap() |
1403 | .matches_with("abc/def", options_not_require_literal)); |
1404 | assert!(Pattern::new("abc?def") |
1405 | .unwrap() |
1406 | .matches_with("abc/def", options_not_require_literal)); |
1407 | assert!(Pattern::new("abc*def") |
1408 | .unwrap() |
1409 | .matches_with("abc/def", options_not_require_literal)); |
1410 | assert!(Pattern::new("abc[/]def") |
1411 | .unwrap() |
1412 | .matches_with("abc/def", options_not_require_literal)); |
1413 | } |
1414 | |
1415 | #[test] |
1416 | fn test_pattern_matches_require_literal_leading_dot() { |
1417 | let options_require_literal_leading_dot = MatchOptions { |
1418 | case_sensitive: true, |
1419 | require_literal_separator: false, |
1420 | require_literal_leading_dot: true, |
1421 | }; |
1422 | let options_not_require_literal_leading_dot = MatchOptions { |
1423 | case_sensitive: true, |
1424 | require_literal_separator: false, |
1425 | require_literal_leading_dot: false, |
1426 | }; |
1427 | |
1428 | let f = |options| { |
1429 | Pattern::new("*.txt") |
1430 | .unwrap() |
1431 | .matches_with(".hello.txt", options) |
1432 | }; |
1433 | assert!(f(options_not_require_literal_leading_dot)); |
1434 | assert!(!f(options_require_literal_leading_dot)); |
1435 | |
1436 | let f = |options| { |
1437 | Pattern::new(".*.*") |
1438 | .unwrap() |
1439 | .matches_with(".hello.txt", options) |
1440 | }; |
1441 | assert!(f(options_not_require_literal_leading_dot)); |
1442 | assert!(f(options_require_literal_leading_dot)); |
1443 | |
1444 | let f = |options| { |
1445 | Pattern::new("aaa/bbb/*") |
1446 | .unwrap() |
1447 | .matches_with("aaa/bbb/.ccc", options) |
1448 | }; |
1449 | assert!(f(options_not_require_literal_leading_dot)); |
1450 | assert!(!f(options_require_literal_leading_dot)); |
1451 | |
1452 | let f = |options| { |
1453 | Pattern::new("aaa/bbb/*") |
1454 | .unwrap() |
1455 | .matches_with("aaa/bbb/c.c.c.", options) |
1456 | }; |
1457 | assert!(f(options_not_require_literal_leading_dot)); |
1458 | assert!(f(options_require_literal_leading_dot)); |
1459 | |
1460 | let f = |options| { |
1461 | Pattern::new("aaa/bbb/.*") |
1462 | .unwrap() |
1463 | .matches_with("aaa/bbb/.ccc", options) |
1464 | }; |
1465 | assert!(f(options_not_require_literal_leading_dot)); |
1466 | assert!(f(options_require_literal_leading_dot)); |
1467 | |
1468 | let f = |options| { |
1469 | Pattern::new("aaa/?bbb") |
1470 | .unwrap() |
1471 | .matches_with("aaa/.bbb", options) |
1472 | }; |
1473 | assert!(f(options_not_require_literal_leading_dot)); |
1474 | assert!(!f(options_require_literal_leading_dot)); |
1475 | |
1476 | let f = |options| { |
1477 | Pattern::new("aaa/[.]bbb") |
1478 | .unwrap() |
1479 | .matches_with("aaa/.bbb", options) |
1480 | }; |
1481 | assert!(f(options_not_require_literal_leading_dot)); |
1482 | assert!(!f(options_require_literal_leading_dot)); |
1483 | |
1484 | let f = |options| Pattern::new("**/*").unwrap().matches_with( ".bbb", options); |
1485 | assert!(f(options_not_require_literal_leading_dot)); |
1486 | assert!(!f(options_require_literal_leading_dot)); |
1487 | } |
1488 | |
1489 | #[test] |
1490 | fn test_matches_path() { |
1491 | // on windows, (Path::new("a/b").as_str().unwrap() == "a\\b"), so this |
1492 | // tests that / and \ are considered equivalent on windows |
1493 | assert!(Pattern::new("a/b").unwrap().matches_path(&Path::new( "a/b"))); |
1494 | } |
1495 | |
1496 | #[test] |
1497 | fn test_path_join() { |
1498 | let pattern = Path::new("one").join(&Path::new( "**/*.rs")); |
1499 | assert!(Pattern::new(pattern.to_str().unwrap()).is_ok()); |
1500 | } |
1501 | } |
1502 |
Definitions
- Paths
- dir_patterns
- require_dir
- options
- todo
- scope
- glob
- glob_with
- check_windows_verbatim
- to_scope
- GlobError
- path
- error
- path
- error
- into_error
- description
- cause
- fmt
- PathWrapper
- path
- is_directory
- from_dir_entry
- from_path
- into_path
- Target
- deref
- as_ref
- GlobResult
- Item
- next
- PatternError
- pos
- msg
- description
- fmt
- Pattern
- original
- tokens
- is_recursive
- fmt
- Err
- from_str
- PatternToken
- Char
- AnyChar
- AnySequence
- AnyRecursiveSequence
- AnyWithin
- AnyExcept
- CharSpecifier
- SingleChar
- CharRange
- MatchResult
- Match
- SubPatternDoesntMatch
- EntirePatternDoesntMatch
- new
- escape
- matches
- matches_path
- matches_with
- matches_path_with
- as_str
- matches_from
- fill_todo
- pattern_as_str
- parse_char_specifiers
- in_char_specifiers
- chars_eq
- MatchOptions
- case_sensitive
- require_literal_separator
- require_literal_leading_dot
Learn Rust with the experts
Find out more