1use std::fmt;
2use std::hash;
3use std::iter;
4use std::ops::{Deref, DerefMut};
5use std::path::{is_separator, Path};
6use std::str;
7
8use regex;
9use regex::bytes::Regex;
10
11use crate::{new_regex, Candidate, Error, ErrorKind};
12
13/// Describes a matching strategy for a particular pattern.
14///
15/// This provides a way to more quickly determine whether a pattern matches
16/// a particular file path in a way that scales with a large number of
17/// patterns. For example, if many patterns are of the form `*.ext`, then it's
18/// possible to test whether any of those patterns matches by looking up a
19/// file path's extension in a hash table.
20#[derive(Clone, Debug, Eq, PartialEq)]
21pub enum MatchStrategy {
22 /// A pattern matches if and only if the entire file path matches this
23 /// literal string.
24 Literal(String),
25 /// A pattern matches if and only if the file path's basename matches this
26 /// literal string.
27 BasenameLiteral(String),
28 /// A pattern matches if and only if the file path's extension matches this
29 /// literal string.
30 Extension(String),
31 /// A pattern matches if and only if this prefix literal is a prefix of the
32 /// candidate file path.
33 Prefix(String),
34 /// A pattern matches if and only if this prefix literal is a prefix of the
35 /// candidate file path.
36 ///
37 /// An exception: if `component` is true, then `suffix` must appear at the
38 /// beginning of a file path or immediately following a `/`.
39 Suffix {
40 /// The actual suffix.
41 suffix: String,
42 /// Whether this must start at the beginning of a path component.
43 component: bool,
44 },
45 /// A pattern matches only if the given extension matches the file path's
46 /// extension. Note that this is a necessary but NOT sufficient criterion.
47 /// Namely, if the extension matches, then a full regex search is still
48 /// required.
49 RequiredExtension(String),
50 /// A regex needs to be used for matching.
51 Regex,
52}
53
54impl MatchStrategy {
55 /// Returns a matching strategy for the given pattern.
56 pub fn new(pat: &Glob) -> MatchStrategy {
57 if let Some(lit: String) = pat.basename_literal() {
58 MatchStrategy::BasenameLiteral(lit)
59 } else if let Some(lit: String) = pat.literal() {
60 MatchStrategy::Literal(lit)
61 } else if let Some(ext: String) = pat.ext() {
62 MatchStrategy::Extension(ext)
63 } else if let Some(prefix: String) = pat.prefix() {
64 MatchStrategy::Prefix(prefix)
65 } else if let Some((suffix: String, component: bool)) = pat.suffix() {
66 MatchStrategy::Suffix { suffix: suffix, component: component }
67 } else if let Some(ext: String) = pat.required_ext() {
68 MatchStrategy::RequiredExtension(ext)
69 } else {
70 MatchStrategy::Regex
71 }
72 }
73}
74
75/// Glob represents a successfully parsed shell glob pattern.
76///
77/// It cannot be used directly to match file paths, but it can be converted
78/// to a regular expression string or a matcher.
79#[derive(Clone, Debug, Eq)]
80pub struct Glob {
81 glob: String,
82 re: String,
83 opts: GlobOptions,
84 tokens: Tokens,
85}
86
87impl PartialEq for Glob {
88 fn eq(&self, other: &Glob) -> bool {
89 self.glob == other.glob && self.opts == other.opts
90 }
91}
92
93impl hash::Hash for Glob {
94 fn hash<H: hash::Hasher>(&self, state: &mut H) {
95 self.glob.hash(state);
96 self.opts.hash(state);
97 }
98}
99
100impl fmt::Display for Glob {
101 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102 self.glob.fmt(f)
103 }
104}
105
106impl str::FromStr for Glob {
107 type Err = Error;
108
109 fn from_str(glob: &str) -> Result<Self, Self::Err> {
110 Self::new(glob)
111 }
112}
113
114/// A matcher for a single pattern.
115#[derive(Clone, Debug)]
116pub struct GlobMatcher {
117 /// The underlying pattern.
118 pat: Glob,
119 /// The pattern, as a compiled regex.
120 re: Regex,
121}
122
123impl GlobMatcher {
124 /// Tests whether the given path matches this pattern or not.
125 pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
126 self.is_match_candidate(&Candidate::new(path.as_ref()))
127 }
128
129 /// Tests whether the given path matches this pattern or not.
130 pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
131 self.re.is_match(&path.path)
132 }
133
134 /// Returns the `Glob` used to compile this matcher.
135 pub fn glob(&self) -> &Glob {
136 &self.pat
137 }
138}
139
140/// A strategic matcher for a single pattern.
141#[cfg(test)]
142#[derive(Clone, Debug)]
143struct GlobStrategic {
144 /// The match strategy to use.
145 strategy: MatchStrategy,
146 /// The pattern, as a compiled regex.
147 re: Regex,
148}
149
150#[cfg(test)]
151impl GlobStrategic {
152 /// Tests whether the given path matches this pattern or not.
153 fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
154 self.is_match_candidate(&Candidate::new(path.as_ref()))
155 }
156
157 /// Tests whether the given path matches this pattern or not.
158 fn is_match_candidate(&self, candidate: &Candidate<'_>) -> bool {
159 let byte_path = &*candidate.path;
160
161 match self.strategy {
162 MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
163 MatchStrategy::BasenameLiteral(ref lit) => {
164 lit.as_bytes() == &*candidate.basename
165 }
166 MatchStrategy::Extension(ref ext) => {
167 ext.as_bytes() == &*candidate.ext
168 }
169 MatchStrategy::Prefix(ref pre) => {
170 starts_with(pre.as_bytes(), byte_path)
171 }
172 MatchStrategy::Suffix { ref suffix, component } => {
173 if component && byte_path == &suffix.as_bytes()[1..] {
174 return true;
175 }
176 ends_with(suffix.as_bytes(), byte_path)
177 }
178 MatchStrategy::RequiredExtension(ref ext) => {
179 let ext = ext.as_bytes();
180 &*candidate.ext == ext && self.re.is_match(byte_path)
181 }
182 MatchStrategy::Regex => self.re.is_match(byte_path),
183 }
184 }
185}
186
187/// A builder for a pattern.
188///
189/// This builder enables configuring the match semantics of a pattern. For
190/// example, one can make matching case insensitive.
191///
192/// The lifetime `'a` refers to the lifetime of the pattern string.
193#[derive(Clone, Debug)]
194pub struct GlobBuilder<'a> {
195 /// The glob pattern to compile.
196 glob: &'a str,
197 /// Options for the pattern.
198 opts: GlobOptions,
199}
200
201#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
202struct GlobOptions {
203 /// Whether to match case insensitively.
204 case_insensitive: bool,
205 /// Whether to require a literal separator to match a separator in a file
206 /// path. e.g., when enabled, `*` won't match `/`.
207 literal_separator: bool,
208 /// Whether or not to use `\` to escape special characters.
209 /// e.g., when enabled, `\*` will match a literal `*`.
210 backslash_escape: bool,
211}
212
213impl GlobOptions {
214 fn default() -> GlobOptions {
215 GlobOptions {
216 case_insensitive: false,
217 literal_separator: false,
218 backslash_escape: !is_separator('\\'),
219 }
220 }
221}
222
223#[derive(Clone, Debug, Default, Eq, PartialEq)]
224struct Tokens(Vec<Token>);
225
226impl Deref for Tokens {
227 type Target = Vec<Token>;
228 fn deref(&self) -> &Vec<Token> {
229 &self.0
230 }
231}
232
233impl DerefMut for Tokens {
234 fn deref_mut(&mut self) -> &mut Vec<Token> {
235 &mut self.0
236 }
237}
238
239#[derive(Clone, Debug, Eq, PartialEq)]
240enum Token {
241 Literal(char),
242 Any,
243 ZeroOrMore,
244 RecursivePrefix,
245 RecursiveSuffix,
246 RecursiveZeroOrMore,
247 Class { negated: bool, ranges: Vec<(char, char)> },
248 Alternates(Vec<Tokens>),
249}
250
251impl Glob {
252 /// Builds a new pattern with default options.
253 pub fn new(glob: &str) -> Result<Glob, Error> {
254 GlobBuilder::new(glob).build()
255 }
256
257 /// Returns a matcher for this pattern.
258 pub fn compile_matcher(&self) -> GlobMatcher {
259 let re =
260 new_regex(&self.re).expect("regex compilation shouldn't fail");
261 GlobMatcher { pat: self.clone(), re: re }
262 }
263
264 /// Returns a strategic matcher.
265 ///
266 /// This isn't exposed because it's not clear whether it's actually
267 /// faster than just running a regex for a *single* pattern. If it
268 /// is faster, then GlobMatcher should do it automatically.
269 #[cfg(test)]
270 fn compile_strategic_matcher(&self) -> GlobStrategic {
271 let strategy = MatchStrategy::new(self);
272 let re =
273 new_regex(&self.re).expect("regex compilation shouldn't fail");
274 GlobStrategic { strategy: strategy, re: re }
275 }
276
277 /// Returns the original glob pattern used to build this pattern.
278 pub fn glob(&self) -> &str {
279 &self.glob
280 }
281
282 /// Returns the regular expression string for this glob.
283 ///
284 /// Note that regular expressions for globs are intended to be matched on
285 /// arbitrary bytes (`&[u8]`) instead of Unicode strings (`&str`). In
286 /// particular, globs are frequently used on file paths, where there is no
287 /// general guarantee that file paths are themselves valid UTF-8. As a
288 /// result, callers will need to ensure that they are using a regex API
289 /// that can match on arbitrary bytes. For example, the
290 /// [`regex`](https://crates.io/regex)
291 /// crate's
292 /// [`Regex`](https://docs.rs/regex/*/regex/struct.Regex.html)
293 /// API is not suitable for this since it matches on `&str`, but its
294 /// [`bytes::Regex`](https://docs.rs/regex/*/regex/bytes/struct.Regex.html)
295 /// API is suitable for this.
296 pub fn regex(&self) -> &str {
297 &self.re
298 }
299
300 /// Returns the pattern as a literal if and only if the pattern must match
301 /// an entire path exactly.
302 ///
303 /// The basic format of these patterns is `{literal}`.
304 fn literal(&self) -> Option<String> {
305 if self.opts.case_insensitive {
306 return None;
307 }
308 let mut lit = String::new();
309 for t in &*self.tokens {
310 match *t {
311 Token::Literal(c) => lit.push(c),
312 _ => return None,
313 }
314 }
315 if lit.is_empty() {
316 None
317 } else {
318 Some(lit)
319 }
320 }
321
322 /// Returns an extension if this pattern matches a file path if and only
323 /// if the file path has the extension returned.
324 ///
325 /// Note that this extension returned differs from the extension that
326 /// std::path::Path::extension returns. Namely, this extension includes
327 /// the '.'. Also, paths like `.rs` are considered to have an extension
328 /// of `.rs`.
329 fn ext(&self) -> Option<String> {
330 if self.opts.case_insensitive {
331 return None;
332 }
333 let start = match self.tokens.get(0) {
334 Some(&Token::RecursivePrefix) => 1,
335 Some(_) => 0,
336 _ => return None,
337 };
338 match self.tokens.get(start) {
339 Some(&Token::ZeroOrMore) => {
340 // If there was no recursive prefix, then we only permit
341 // `*` if `*` can match a `/`. For example, if `*` can't
342 // match `/`, then `*.c` doesn't match `foo/bar.c`.
343 if start == 0 && self.opts.literal_separator {
344 return None;
345 }
346 }
347 _ => return None,
348 }
349 match self.tokens.get(start + 1) {
350 Some(&Token::Literal('.')) => {}
351 _ => return None,
352 }
353 let mut lit = ".".to_string();
354 for t in self.tokens[start + 2..].iter() {
355 match *t {
356 Token::Literal('.') | Token::Literal('/') => return None,
357 Token::Literal(c) => lit.push(c),
358 _ => return None,
359 }
360 }
361 if lit.is_empty() {
362 None
363 } else {
364 Some(lit)
365 }
366 }
367
368 /// This is like `ext`, but returns an extension even if it isn't sufficient
369 /// to imply a match. Namely, if an extension is returned, then it is
370 /// necessary but not sufficient for a match.
371 fn required_ext(&self) -> Option<String> {
372 if self.opts.case_insensitive {
373 return None;
374 }
375 // We don't care at all about the beginning of this pattern. All we
376 // need to check for is if it ends with a literal of the form `.ext`.
377 let mut ext: Vec<char> = vec![]; // built in reverse
378 for t in self.tokens.iter().rev() {
379 match *t {
380 Token::Literal('/') => return None,
381 Token::Literal(c) => {
382 ext.push(c);
383 if c == '.' {
384 break;
385 }
386 }
387 _ => return None,
388 }
389 }
390 if ext.last() != Some(&'.') {
391 None
392 } else {
393 ext.reverse();
394 Some(ext.into_iter().collect())
395 }
396 }
397
398 /// Returns a literal prefix of this pattern if the entire pattern matches
399 /// if the literal prefix matches.
400 fn prefix(&self) -> Option<String> {
401 if self.opts.case_insensitive {
402 return None;
403 }
404 let (end, need_sep) = match self.tokens.last() {
405 Some(&Token::ZeroOrMore) => {
406 if self.opts.literal_separator {
407 // If a trailing `*` can't match a `/`, then we can't
408 // assume a match of the prefix corresponds to a match
409 // of the overall pattern. e.g., `foo/*` with
410 // `literal_separator` enabled matches `foo/bar` but not
411 // `foo/bar/baz`, even though `foo/bar/baz` has a `foo/`
412 // literal prefix.
413 return None;
414 }
415 (self.tokens.len() - 1, false)
416 }
417 Some(&Token::RecursiveSuffix) => (self.tokens.len() - 1, true),
418 _ => (self.tokens.len(), false),
419 };
420 let mut lit = String::new();
421 for t in &self.tokens[0..end] {
422 match *t {
423 Token::Literal(c) => lit.push(c),
424 _ => return None,
425 }
426 }
427 if need_sep {
428 lit.push('/');
429 }
430 if lit.is_empty() {
431 None
432 } else {
433 Some(lit)
434 }
435 }
436
437 /// Returns a literal suffix of this pattern if the entire pattern matches
438 /// if the literal suffix matches.
439 ///
440 /// If a literal suffix is returned and it must match either the entire
441 /// file path or be preceded by a `/`, then also return true. This happens
442 /// with a pattern like `**/foo/bar`. Namely, this pattern matches
443 /// `foo/bar` and `baz/foo/bar`, but not `foofoo/bar`. In this case, the
444 /// suffix returned is `/foo/bar` (but should match the entire path
445 /// `foo/bar`).
446 ///
447 /// When this returns true, the suffix literal is guaranteed to start with
448 /// a `/`.
449 fn suffix(&self) -> Option<(String, bool)> {
450 if self.opts.case_insensitive {
451 return None;
452 }
453 let mut lit = String::new();
454 let (start, entire) = match self.tokens.get(0) {
455 Some(&Token::RecursivePrefix) => {
456 // We only care if this follows a path component if the next
457 // token is a literal.
458 if let Some(&Token::Literal(_)) = self.tokens.get(1) {
459 lit.push('/');
460 (1, true)
461 } else {
462 (1, false)
463 }
464 }
465 _ => (0, false),
466 };
467 let start = match self.tokens.get(start) {
468 Some(&Token::ZeroOrMore) => {
469 // If literal_separator is enabled, then a `*` can't
470 // necessarily match everything, so reporting a suffix match
471 // as a match of the pattern would be a false positive.
472 if self.opts.literal_separator {
473 return None;
474 }
475 start + 1
476 }
477 _ => start,
478 };
479 for t in &self.tokens[start..] {
480 match *t {
481 Token::Literal(c) => lit.push(c),
482 _ => return None,
483 }
484 }
485 if lit.is_empty() || lit == "/" {
486 None
487 } else {
488 Some((lit, entire))
489 }
490 }
491
492 /// If this pattern only needs to inspect the basename of a file path,
493 /// then the tokens corresponding to only the basename match are returned.
494 ///
495 /// For example, given a pattern of `**/*.foo`, only the tokens
496 /// corresponding to `*.foo` are returned.
497 ///
498 /// Note that this will return None if any match of the basename tokens
499 /// doesn't correspond to a match of the entire pattern. For example, the
500 /// glob `foo` only matches when a file path has a basename of `foo`, but
501 /// doesn't *always* match when a file path has a basename of `foo`. e.g.,
502 /// `foo` doesn't match `abc/foo`.
503 fn basename_tokens(&self) -> Option<&[Token]> {
504 if self.opts.case_insensitive {
505 return None;
506 }
507 let start = match self.tokens.get(0) {
508 Some(&Token::RecursivePrefix) => 1,
509 _ => {
510 // With nothing to gobble up the parent portion of a path,
511 // we can't assume that matching on only the basename is
512 // correct.
513 return None;
514 }
515 };
516 if self.tokens[start..].is_empty() {
517 return None;
518 }
519 for t in &self.tokens[start..] {
520 match *t {
521 Token::Literal('/') => return None,
522 Token::Literal(_) => {} // OK
523 Token::Any | Token::ZeroOrMore => {
524 if !self.opts.literal_separator {
525 // In this case, `*` and `?` can match a path
526 // separator, which means this could reach outside
527 // the basename.
528 return None;
529 }
530 }
531 Token::RecursivePrefix
532 | Token::RecursiveSuffix
533 | Token::RecursiveZeroOrMore => {
534 return None;
535 }
536 Token::Class { .. } | Token::Alternates(..) => {
537 // We *could* be a little smarter here, but either one
538 // of these is going to prevent our literal optimizations
539 // anyway, so give up.
540 return None;
541 }
542 }
543 }
544 Some(&self.tokens[start..])
545 }
546
547 /// Returns the pattern as a literal if and only if the pattern exclusively
548 /// matches the basename of a file path *and* is a literal.
549 ///
550 /// The basic format of these patterns is `**/{literal}`, where `{literal}`
551 /// does not contain a path separator.
552 fn basename_literal(&self) -> Option<String> {
553 let tokens = match self.basename_tokens() {
554 None => return None,
555 Some(tokens) => tokens,
556 };
557 let mut lit = String::new();
558 for t in tokens {
559 match *t {
560 Token::Literal(c) => lit.push(c),
561 _ => return None,
562 }
563 }
564 Some(lit)
565 }
566}
567
568impl<'a> GlobBuilder<'a> {
569 /// Create a new builder for the pattern given.
570 ///
571 /// The pattern is not compiled until `build` is called.
572 pub fn new(glob: &'a str) -> GlobBuilder<'a> {
573 GlobBuilder { glob: glob, opts: GlobOptions::default() }
574 }
575
576 /// Parses and builds the pattern.
577 pub fn build(&self) -> Result<Glob, Error> {
578 let mut p = Parser {
579 glob: &self.glob,
580 stack: vec![Tokens::default()],
581 chars: self.glob.chars().peekable(),
582 prev: None,
583 cur: None,
584 opts: &self.opts,
585 };
586 p.parse()?;
587 if p.stack.is_empty() {
588 Err(Error {
589 glob: Some(self.glob.to_string()),
590 kind: ErrorKind::UnopenedAlternates,
591 })
592 } else if p.stack.len() > 1 {
593 Err(Error {
594 glob: Some(self.glob.to_string()),
595 kind: ErrorKind::UnclosedAlternates,
596 })
597 } else {
598 let tokens = p.stack.pop().unwrap();
599 Ok(Glob {
600 glob: self.glob.to_string(),
601 re: tokens.to_regex_with(&self.opts),
602 opts: self.opts,
603 tokens: tokens,
604 })
605 }
606 }
607
608 /// Toggle whether the pattern matches case insensitively or not.
609 ///
610 /// This is disabled by default.
611 pub fn case_insensitive(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
612 self.opts.case_insensitive = yes;
613 self
614 }
615
616 /// Toggle whether a literal `/` is required to match a path separator.
617 ///
618 /// By default this is false: `*` and `?` will match `/`.
619 pub fn literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
620 self.opts.literal_separator = yes;
621 self
622 }
623
624 /// When enabled, a back slash (`\`) may be used to escape
625 /// special characters in a glob pattern. Additionally, this will
626 /// prevent `\` from being interpreted as a path separator on all
627 /// platforms.
628 ///
629 /// This is enabled by default on platforms where `\` is not a
630 /// path separator and disabled by default on platforms where `\`
631 /// is a path separator.
632 pub fn backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
633 self.opts.backslash_escape = yes;
634 self
635 }
636}
637
638impl Tokens {
639 /// Convert this pattern to a string that is guaranteed to be a valid
640 /// regular expression and will represent the matching semantics of this
641 /// glob pattern and the options given.
642 fn to_regex_with(&self, options: &GlobOptions) -> String {
643 let mut re = String::new();
644 re.push_str("(?-u)");
645 if options.case_insensitive {
646 re.push_str("(?i)");
647 }
648 re.push('^');
649 // Special case. If the entire glob is just `**`, then it should match
650 // everything.
651 if self.len() == 1 && self[0] == Token::RecursivePrefix {
652 re.push_str(".*");
653 re.push('$');
654 return re;
655 }
656 self.tokens_to_regex(options, &self, &mut re);
657 re.push('$');
658 re
659 }
660
661 fn tokens_to_regex(
662 &self,
663 options: &GlobOptions,
664 tokens: &[Token],
665 re: &mut String,
666 ) {
667 for tok in tokens {
668 match *tok {
669 Token::Literal(c) => {
670 re.push_str(&char_to_escaped_literal(c));
671 }
672 Token::Any => {
673 if options.literal_separator {
674 re.push_str("[^/]");
675 } else {
676 re.push_str(".");
677 }
678 }
679 Token::ZeroOrMore => {
680 if options.literal_separator {
681 re.push_str("[^/]*");
682 } else {
683 re.push_str(".*");
684 }
685 }
686 Token::RecursivePrefix => {
687 re.push_str("(?:/?|.*/)");
688 }
689 Token::RecursiveSuffix => {
690 re.push_str("/.*");
691 }
692 Token::RecursiveZeroOrMore => {
693 re.push_str("(?:/|/.*/)");
694 }
695 Token::Class { negated, ref ranges } => {
696 re.push('[');
697 if negated {
698 re.push('^');
699 }
700 for r in ranges {
701 if r.0 == r.1 {
702 // Not strictly necessary, but nicer to look at.
703 re.push_str(&char_to_escaped_literal(r.0));
704 } else {
705 re.push_str(&char_to_escaped_literal(r.0));
706 re.push('-');
707 re.push_str(&char_to_escaped_literal(r.1));
708 }
709 }
710 re.push(']');
711 }
712 Token::Alternates(ref patterns) => {
713 let mut parts = vec![];
714 for pat in patterns {
715 let mut altre = String::new();
716 self.tokens_to_regex(options, &pat, &mut altre);
717 if !altre.is_empty() {
718 parts.push(altre);
719 }
720 }
721
722 // It is possible to have an empty set in which case the
723 // resulting alternation '()' would be an error.
724 if !parts.is_empty() {
725 re.push('(');
726 re.push_str(&parts.join("|"));
727 re.push(')');
728 }
729 }
730 }
731 }
732 }
733}
734
735/// Convert a Unicode scalar value to an escaped string suitable for use as
736/// a literal in a non-Unicode regex.
737fn char_to_escaped_literal(c: char) -> String {
738 bytes_to_escaped_literal(&c.to_string().into_bytes())
739}
740
741/// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII
742/// code units are converted to their escaped form.
743fn bytes_to_escaped_literal(bs: &[u8]) -> String {
744 let mut s: String = String::with_capacity(bs.len());
745 for &b: u8 in bs {
746 if b <= 0x7F {
747 s.push_str(&regex::escape(&(b as char).to_string()));
748 } else {
749 s.push_str(&format!("\\x{:02x}", b));
750 }
751 }
752 s
753}
754
755struct Parser<'a> {
756 glob: &'a str,
757 stack: Vec<Tokens>,
758 chars: iter::Peekable<str::Chars<'a>>,
759 prev: Option<char>,
760 cur: Option<char>,
761 opts: &'a GlobOptions,
762}
763
764impl<'a> Parser<'a> {
765 fn error(&self, kind: ErrorKind) -> Error {
766 Error { glob: Some(self.glob.to_string()), kind: kind }
767 }
768
769 fn parse(&mut self) -> Result<(), Error> {
770 while let Some(c) = self.bump() {
771 match c {
772 '?' => self.push_token(Token::Any)?,
773 '*' => self.parse_star()?,
774 '[' => self.parse_class()?,
775 '{' => self.push_alternate()?,
776 '}' => self.pop_alternate()?,
777 ',' => self.parse_comma()?,
778 '\\' => self.parse_backslash()?,
779 c => self.push_token(Token::Literal(c))?,
780 }
781 }
782 Ok(())
783 }
784
785 fn push_alternate(&mut self) -> Result<(), Error> {
786 if self.stack.len() > 1 {
787 return Err(self.error(ErrorKind::NestedAlternates));
788 }
789 Ok(self.stack.push(Tokens::default()))
790 }
791
792 fn pop_alternate(&mut self) -> Result<(), Error> {
793 let mut alts = vec![];
794 while self.stack.len() >= 2 {
795 alts.push(self.stack.pop().unwrap());
796 }
797 self.push_token(Token::Alternates(alts))
798 }
799
800 fn push_token(&mut self, tok: Token) -> Result<(), Error> {
801 if let Some(ref mut pat) = self.stack.last_mut() {
802 return Ok(pat.push(tok));
803 }
804 Err(self.error(ErrorKind::UnopenedAlternates))
805 }
806
807 fn pop_token(&mut self) -> Result<Token, Error> {
808 if let Some(ref mut pat) = self.stack.last_mut() {
809 return Ok(pat.pop().unwrap());
810 }
811 Err(self.error(ErrorKind::UnopenedAlternates))
812 }
813
814 fn have_tokens(&self) -> Result<bool, Error> {
815 match self.stack.last() {
816 None => Err(self.error(ErrorKind::UnopenedAlternates)),
817 Some(ref pat) => Ok(!pat.is_empty()),
818 }
819 }
820
821 fn parse_comma(&mut self) -> Result<(), Error> {
822 // If we aren't inside a group alternation, then don't
823 // treat commas specially. Otherwise, we need to start
824 // a new alternate.
825 if self.stack.len() <= 1 {
826 self.push_token(Token::Literal(','))
827 } else {
828 Ok(self.stack.push(Tokens::default()))
829 }
830 }
831
832 fn parse_backslash(&mut self) -> Result<(), Error> {
833 if self.opts.backslash_escape {
834 match self.bump() {
835 None => Err(self.error(ErrorKind::DanglingEscape)),
836 Some(c) => self.push_token(Token::Literal(c)),
837 }
838 } else if is_separator('\\') {
839 // Normalize all patterns to use / as a separator.
840 self.push_token(Token::Literal('/'))
841 } else {
842 self.push_token(Token::Literal('\\'))
843 }
844 }
845
846 fn parse_star(&mut self) -> Result<(), Error> {
847 let prev = self.prev;
848 if self.peek() != Some('*') {
849 self.push_token(Token::ZeroOrMore)?;
850 return Ok(());
851 }
852 assert!(self.bump() == Some('*'));
853 if !self.have_tokens()? {
854 if !self.peek().map_or(true, is_separator) {
855 self.push_token(Token::ZeroOrMore)?;
856 self.push_token(Token::ZeroOrMore)?;
857 } else {
858 self.push_token(Token::RecursivePrefix)?;
859 assert!(self.bump().map_or(true, is_separator));
860 }
861 return Ok(());
862 }
863
864 if !prev.map(is_separator).unwrap_or(false) {
865 if self.stack.len() <= 1
866 || (prev != Some(',') && prev != Some('{'))
867 {
868 self.push_token(Token::ZeroOrMore)?;
869 self.push_token(Token::ZeroOrMore)?;
870 return Ok(());
871 }
872 }
873 let is_suffix = match self.peek() {
874 None => {
875 assert!(self.bump().is_none());
876 true
877 }
878 Some(',') | Some('}') if self.stack.len() >= 2 => true,
879 Some(c) if is_separator(c) => {
880 assert!(self.bump().map(is_separator).unwrap_or(false));
881 false
882 }
883 _ => {
884 self.push_token(Token::ZeroOrMore)?;
885 self.push_token(Token::ZeroOrMore)?;
886 return Ok(());
887 }
888 };
889 match self.pop_token()? {
890 Token::RecursivePrefix => {
891 self.push_token(Token::RecursivePrefix)?;
892 }
893 Token::RecursiveSuffix => {
894 self.push_token(Token::RecursiveSuffix)?;
895 }
896 _ => {
897 if is_suffix {
898 self.push_token(Token::RecursiveSuffix)?;
899 } else {
900 self.push_token(Token::RecursiveZeroOrMore)?;
901 }
902 }
903 }
904 Ok(())
905 }
906
907 fn parse_class(&mut self) -> Result<(), Error> {
908 fn add_to_last_range(
909 glob: &str,
910 r: &mut (char, char),
911 add: char,
912 ) -> Result<(), Error> {
913 r.1 = add;
914 if r.1 < r.0 {
915 Err(Error {
916 glob: Some(glob.to_string()),
917 kind: ErrorKind::InvalidRange(r.0, r.1),
918 })
919 } else {
920 Ok(())
921 }
922 }
923 let mut ranges = vec![];
924 let negated = match self.chars.peek() {
925 Some(&'!') | Some(&'^') => {
926 let bump = self.bump();
927 assert!(bump == Some('!') || bump == Some('^'));
928 true
929 }
930 _ => false,
931 };
932 let mut first = true;
933 let mut in_range = false;
934 loop {
935 let c = match self.bump() {
936 Some(c) => c,
937 // The only way to successfully break this loop is to observe
938 // a ']'.
939 None => return Err(self.error(ErrorKind::UnclosedClass)),
940 };
941 match c {
942 ']' => {
943 if first {
944 ranges.push((']', ']'));
945 } else {
946 break;
947 }
948 }
949 '-' => {
950 if first {
951 ranges.push(('-', '-'));
952 } else if in_range {
953 // invariant: in_range is only set when there is
954 // already at least one character seen.
955 let r = ranges.last_mut().unwrap();
956 add_to_last_range(&self.glob, r, '-')?;
957 in_range = false;
958 } else {
959 assert!(!ranges.is_empty());
960 in_range = true;
961 }
962 }
963 c => {
964 if in_range {
965 // invariant: in_range is only set when there is
966 // already at least one character seen.
967 add_to_last_range(
968 &self.glob,
969 ranges.last_mut().unwrap(),
970 c,
971 )?;
972 } else {
973 ranges.push((c, c));
974 }
975 in_range = false;
976 }
977 }
978 first = false;
979 }
980 if in_range {
981 // Means that the last character in the class was a '-', so add
982 // it as a literal.
983 ranges.push(('-', '-'));
984 }
985 self.push_token(Token::Class { negated: negated, ranges: ranges })
986 }
987
988 fn bump(&mut self) -> Option<char> {
989 self.prev = self.cur;
990 self.cur = self.chars.next();
991 self.cur
992 }
993
994 fn peek(&mut self) -> Option<char> {
995 self.chars.peek().map(|&ch| ch)
996 }
997}
998
999#[cfg(test)]
1000fn starts_with(needle: &[u8], haystack: &[u8]) -> bool {
1001 needle.len() <= haystack.len() && needle == &haystack[..needle.len()]
1002}
1003
1004#[cfg(test)]
1005fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
1006 if needle.len() > haystack.len() {
1007 return false;
1008 }
1009 needle == &haystack[haystack.len() - needle.len()..]
1010}
1011
1012#[cfg(test)]
1013mod tests {
1014 use super::Token::*;
1015 use super::{Glob, GlobBuilder, Token};
1016 use crate::{ErrorKind, GlobSetBuilder};
1017
1018 #[derive(Clone, Copy, Debug, Default)]
1019 struct Options {
1020 casei: Option<bool>,
1021 litsep: Option<bool>,
1022 bsesc: Option<bool>,
1023 }
1024
1025 macro_rules! syntax {
1026 ($name:ident, $pat:expr, $tokens:expr) => {
1027 #[test]
1028 fn $name() {
1029 let pat = Glob::new($pat).unwrap();
1030 assert_eq!($tokens, pat.tokens.0);
1031 }
1032 };
1033 }
1034
1035 macro_rules! syntaxerr {
1036 ($name:ident, $pat:expr, $err:expr) => {
1037 #[test]
1038 fn $name() {
1039 let err = Glob::new($pat).unwrap_err();
1040 assert_eq!(&$err, err.kind());
1041 }
1042 };
1043 }
1044
1045 macro_rules! toregex {
1046 ($name:ident, $pat:expr, $re:expr) => {
1047 toregex!($name, $pat, $re, Options::default());
1048 };
1049 ($name:ident, $pat:expr, $re:expr, $options:expr) => {
1050 #[test]
1051 fn $name() {
1052 let mut builder = GlobBuilder::new($pat);
1053 if let Some(casei) = $options.casei {
1054 builder.case_insensitive(casei);
1055 }
1056 if let Some(litsep) = $options.litsep {
1057 builder.literal_separator(litsep);
1058 }
1059 if let Some(bsesc) = $options.bsesc {
1060 builder.backslash_escape(bsesc);
1061 }
1062 let pat = builder.build().unwrap();
1063 assert_eq!(format!("(?-u){}", $re), pat.regex());
1064 }
1065 };
1066 }
1067
1068 macro_rules! matches {
1069 ($name:ident, $pat:expr, $path:expr) => {
1070 matches!($name, $pat, $path, Options::default());
1071 };
1072 ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1073 #[test]
1074 fn $name() {
1075 let mut builder = GlobBuilder::new($pat);
1076 if let Some(casei) = $options.casei {
1077 builder.case_insensitive(casei);
1078 }
1079 if let Some(litsep) = $options.litsep {
1080 builder.literal_separator(litsep);
1081 }
1082 if let Some(bsesc) = $options.bsesc {
1083 builder.backslash_escape(bsesc);
1084 }
1085 let pat = builder.build().unwrap();
1086 let matcher = pat.compile_matcher();
1087 let strategic = pat.compile_strategic_matcher();
1088 let set = GlobSetBuilder::new().add(pat).build().unwrap();
1089 assert!(matcher.is_match($path));
1090 assert!(strategic.is_match($path));
1091 assert!(set.is_match($path));
1092 }
1093 };
1094 }
1095
1096 macro_rules! nmatches {
1097 ($name:ident, $pat:expr, $path:expr) => {
1098 nmatches!($name, $pat, $path, Options::default());
1099 };
1100 ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1101 #[test]
1102 fn $name() {
1103 let mut builder = GlobBuilder::new($pat);
1104 if let Some(casei) = $options.casei {
1105 builder.case_insensitive(casei);
1106 }
1107 if let Some(litsep) = $options.litsep {
1108 builder.literal_separator(litsep);
1109 }
1110 if let Some(bsesc) = $options.bsesc {
1111 builder.backslash_escape(bsesc);
1112 }
1113 let pat = builder.build().unwrap();
1114 let matcher = pat.compile_matcher();
1115 let strategic = pat.compile_strategic_matcher();
1116 let set = GlobSetBuilder::new().add(pat).build().unwrap();
1117 assert!(!matcher.is_match($path));
1118 assert!(!strategic.is_match($path));
1119 assert!(!set.is_match($path));
1120 }
1121 };
1122 }
1123
1124 fn s(string: &str) -> String {
1125 string.to_string()
1126 }
1127
1128 fn class(s: char, e: char) -> Token {
1129 Class { negated: false, ranges: vec![(s, e)] }
1130 }
1131
1132 fn classn(s: char, e: char) -> Token {
1133 Class { negated: true, ranges: vec![(s, e)] }
1134 }
1135
1136 fn rclass(ranges: &[(char, char)]) -> Token {
1137 Class { negated: false, ranges: ranges.to_vec() }
1138 }
1139
1140 fn rclassn(ranges: &[(char, char)]) -> Token {
1141 Class { negated: true, ranges: ranges.to_vec() }
1142 }
1143
1144 syntax!(literal1, "a", vec![Literal('a')]);
1145 syntax!(literal2, "ab", vec![Literal('a'), Literal('b')]);
1146 syntax!(any1, "?", vec![Any]);
1147 syntax!(any2, "a?b", vec![Literal('a'), Any, Literal('b')]);
1148 syntax!(seq1, "*", vec![ZeroOrMore]);
1149 syntax!(seq2, "a*b", vec![Literal('a'), ZeroOrMore, Literal('b')]);
1150 syntax!(
1151 seq3,
1152 "*a*b*",
1153 vec![ZeroOrMore, Literal('a'), ZeroOrMore, Literal('b'), ZeroOrMore,]
1154 );
1155 syntax!(rseq1, "**", vec![RecursivePrefix]);
1156 syntax!(rseq2, "**/", vec![RecursivePrefix]);
1157 syntax!(rseq3, "/**", vec![RecursiveSuffix]);
1158 syntax!(rseq4, "/**/", vec![RecursiveZeroOrMore]);
1159 syntax!(
1160 rseq5,
1161 "a/**/b",
1162 vec![Literal('a'), RecursiveZeroOrMore, Literal('b'),]
1163 );
1164 syntax!(cls1, "[a]", vec![class('a', 'a')]);
1165 syntax!(cls2, "[!a]", vec![classn('a', 'a')]);
1166 syntax!(cls3, "[a-z]", vec![class('a', 'z')]);
1167 syntax!(cls4, "[!a-z]", vec![classn('a', 'z')]);
1168 syntax!(cls5, "[-]", vec![class('-', '-')]);
1169 syntax!(cls6, "[]]", vec![class(']', ']')]);
1170 syntax!(cls7, "[*]", vec![class('*', '*')]);
1171 syntax!(cls8, "[!!]", vec![classn('!', '!')]);
1172 syntax!(cls9, "[a-]", vec![rclass(&[('a', 'a'), ('-', '-')])]);
1173 syntax!(cls10, "[-a-z]", vec![rclass(&[('-', '-'), ('a', 'z')])]);
1174 syntax!(cls11, "[a-z-]", vec![rclass(&[('a', 'z'), ('-', '-')])]);
1175 syntax!(
1176 cls12,
1177 "[-a-z-]",
1178 vec![rclass(&[('-', '-'), ('a', 'z'), ('-', '-')]),]
1179 );
1180 syntax!(cls13, "[]-z]", vec![class(']', 'z')]);
1181 syntax!(cls14, "[--z]", vec![class('-', 'z')]);
1182 syntax!(cls15, "[ --]", vec![class(' ', '-')]);
1183 syntax!(cls16, "[0-9a-z]", vec![rclass(&[('0', '9'), ('a', 'z')])]);
1184 syntax!(cls17, "[a-z0-9]", vec![rclass(&[('a', 'z'), ('0', '9')])]);
1185 syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]);
1186 syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]);
1187 syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
1188 syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
1189
1190 syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
1191 syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
1192 syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
1193 syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass);
1194 syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
1195 syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
1196
1197 const CASEI: Options =
1198 Options { casei: Some(true), litsep: None, bsesc: None };
1199 const SLASHLIT: Options =
1200 Options { casei: None, litsep: Some(true), bsesc: None };
1201 const NOBSESC: Options =
1202 Options { casei: None, litsep: None, bsesc: Some(false) };
1203 const BSESC: Options =
1204 Options { casei: None, litsep: None, bsesc: Some(true) };
1205
1206 toregex!(re_casei, "a", "(?i)^a$", &CASEI);
1207
1208 toregex!(re_slash1, "?", r"^[^/]$", SLASHLIT);
1209 toregex!(re_slash2, "*", r"^[^/]*$", SLASHLIT);
1210
1211 toregex!(re1, "a", "^a$");
1212 toregex!(re2, "?", "^.$");
1213 toregex!(re3, "*", "^.*$");
1214 toregex!(re4, "a?", "^a.$");
1215 toregex!(re5, "?a", "^.a$");
1216 toregex!(re6, "a*", "^a.*$");
1217 toregex!(re7, "*a", "^.*a$");
1218 toregex!(re8, "[*]", r"^[\*]$");
1219 toregex!(re9, "[+]", r"^[\+]$");
1220 toregex!(re10, "+", r"^\+$");
1221 toregex!(re11, "☃", r"^\xe2\x98\x83$");
1222 toregex!(re12, "**", r"^.*$");
1223 toregex!(re13, "**/", r"^.*$");
1224 toregex!(re14, "**/*", r"^(?:/?|.*/).*$");
1225 toregex!(re15, "**/**", r"^.*$");
1226 toregex!(re16, "**/**/*", r"^(?:/?|.*/).*$");
1227 toregex!(re17, "**/**/**", r"^.*$");
1228 toregex!(re18, "**/**/**/*", r"^(?:/?|.*/).*$");
1229 toregex!(re19, "a/**", r"^a/.*$");
1230 toregex!(re20, "a/**/**", r"^a/.*$");
1231 toregex!(re21, "a/**/**/**", r"^a/.*$");
1232 toregex!(re22, "a/**/b", r"^a(?:/|/.*/)b$");
1233 toregex!(re23, "a/**/**/b", r"^a(?:/|/.*/)b$");
1234 toregex!(re24, "a/**/**/**/b", r"^a(?:/|/.*/)b$");
1235 toregex!(re25, "**/b", r"^(?:/?|.*/)b$");
1236 toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$");
1237 toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$");
1238 toregex!(re28, "a**", r"^a.*.*$");
1239 toregex!(re29, "**a", r"^.*.*a$");
1240 toregex!(re30, "a**b", r"^a.*.*b$");
1241 toregex!(re31, "***", r"^.*.*.*$");
1242 toregex!(re32, "/a**", r"^/a.*.*$");
1243 toregex!(re33, "/**a", r"^/.*.*a$");
1244 toregex!(re34, "/a**b", r"^/a.*.*b$");
1245
1246 matches!(match1, "a", "a");
1247 matches!(match2, "a*b", "a_b");
1248 matches!(match3, "a*b*c", "abc");
1249 matches!(match4, "a*b*c", "a_b_c");
1250 matches!(match5, "a*b*c", "a___b___c");
1251 matches!(match6, "abc*abc*abc", "abcabcabcabcabcabcabc");
1252 matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1253 matches!(match8, "a*b[xyz]c*d", "abxcdbxcddd");
1254 matches!(match9, "*.rs", ".rs");
1255 matches!(match10, "☃", "☃");
1256
1257 matches!(matchrec1, "some/**/needle.txt", "some/needle.txt");
1258 matches!(matchrec2, "some/**/needle.txt", "some/one/needle.txt");
1259 matches!(matchrec3, "some/**/needle.txt", "some/one/two/needle.txt");
1260 matches!(matchrec4, "some/**/needle.txt", "some/other/needle.txt");
1261 matches!(matchrec5, "**", "abcde");
1262 matches!(matchrec6, "**", "");
1263 matches!(matchrec7, "**", ".asdf");
1264 matches!(matchrec8, "**", "/x/.asdf");
1265 matches!(matchrec9, "some/**/**/needle.txt", "some/needle.txt");
1266 matches!(matchrec10, "some/**/**/needle.txt", "some/one/needle.txt");
1267 matches!(matchrec11, "some/**/**/needle.txt", "some/one/two/needle.txt");
1268 matches!(matchrec12, "some/**/**/needle.txt", "some/other/needle.txt");
1269 matches!(matchrec13, "**/test", "one/two/test");
1270 matches!(matchrec14, "**/test", "one/test");
1271 matches!(matchrec15, "**/test", "test");
1272 matches!(matchrec16, "/**/test", "/one/two/test");
1273 matches!(matchrec17, "/**/test", "/one/test");
1274 matches!(matchrec18, "/**/test", "/test");
1275 matches!(matchrec19, "**/.*", ".abc");
1276 matches!(matchrec20, "**/.*", "abc/.abc");
1277 matches!(matchrec21, "**/foo/bar", "foo/bar");
1278 matches!(matchrec22, ".*/**", ".abc/abc");
1279 matches!(matchrec23, "test/**", "test/");
1280 matches!(matchrec24, "test/**", "test/one");
1281 matches!(matchrec25, "test/**", "test/one/two");
1282 matches!(matchrec26, "some/*/needle.txt", "some/one/needle.txt");
1283
1284 matches!(matchrange1, "a[0-9]b", "a0b");
1285 matches!(matchrange2, "a[0-9]b", "a9b");
1286 matches!(matchrange3, "a[!0-9]b", "a_b");
1287 matches!(matchrange4, "[a-z123]", "1");
1288 matches!(matchrange5, "[1a-z23]", "1");
1289 matches!(matchrange6, "[123a-z]", "1");
1290 matches!(matchrange7, "[abc-]", "-");
1291 matches!(matchrange8, "[-abc]", "-");
1292 matches!(matchrange9, "[-a-c]", "b");
1293 matches!(matchrange10, "[a-c-]", "b");
1294 matches!(matchrange11, "[-]", "-");
1295 matches!(matchrange12, "a[^0-9]b", "a_b");
1296
1297 matches!(matchpat1, "*hello.txt", "hello.txt");
1298 matches!(matchpat2, "*hello.txt", "gareth_says_hello.txt");
1299 matches!(matchpat3, "*hello.txt", "some/path/to/hello.txt");
1300 matches!(matchpat4, "*hello.txt", "some\\path\\to\\hello.txt");
1301 matches!(matchpat5, "*hello.txt", "/an/absolute/path/to/hello.txt");
1302 matches!(matchpat6, "*some/path/to/hello.txt", "some/path/to/hello.txt");
1303 matches!(
1304 matchpat7,
1305 "*some/path/to/hello.txt",
1306 "a/bigger/some/path/to/hello.txt"
1307 );
1308
1309 matches!(matchescape, "_[[]_[]]_[?]_[*]_!_", "_[_]_?_*_!_");
1310
1311 matches!(matchcasei1, "aBcDeFg", "aBcDeFg", CASEI);
1312 matches!(matchcasei2, "aBcDeFg", "abcdefg", CASEI);
1313 matches!(matchcasei3, "aBcDeFg", "ABCDEFG", CASEI);
1314 matches!(matchcasei4, "aBcDeFg", "AbCdEfG", CASEI);
1315
1316 matches!(matchalt1, "a,b", "a,b");
1317 matches!(matchalt2, ",", ",");
1318 matches!(matchalt3, "{a,b}", "a");
1319 matches!(matchalt4, "{a,b}", "b");
1320 matches!(matchalt5, "{**/src/**,foo}", "abc/src/bar");
1321 matches!(matchalt6, "{**/src/**,foo}", "foo");
1322 matches!(matchalt7, "{[}],foo}", "}");
1323 matches!(matchalt8, "{foo}", "foo");
1324 matches!(matchalt9, "{}", "");
1325 matches!(matchalt10, "{,}", "");
1326 matches!(matchalt11, "{*.foo,*.bar,*.wat}", "test.foo");
1327 matches!(matchalt12, "{*.foo,*.bar,*.wat}", "test.bar");
1328 matches!(matchalt13, "{*.foo,*.bar,*.wat}", "test.wat");
1329
1330 matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
1331 #[cfg(unix)]
1332 nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
1333 #[cfg(not(unix))]
1334 nmatches!(matchslash2, "abc?def", "abc\\def", SLASHLIT);
1335 nmatches!(matchslash3, "abc*def", "abc/def", SLASHLIT);
1336 matches!(matchslash4, "abc[/]def", "abc/def", SLASHLIT); // differs
1337 #[cfg(unix)]
1338 nmatches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1339 #[cfg(not(unix))]
1340 matches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1341
1342 matches!(matchbackslash1, "\\[", "[", BSESC);
1343 matches!(matchbackslash2, "\\?", "?", BSESC);
1344 matches!(matchbackslash3, "\\*", "*", BSESC);
1345 matches!(matchbackslash4, "\\[a-z]", "\\a", NOBSESC);
1346 matches!(matchbackslash5, "\\?", "\\a", NOBSESC);
1347 matches!(matchbackslash6, "\\*", "\\\\", NOBSESC);
1348 #[cfg(unix)]
1349 matches!(matchbackslash7, "\\a", "a");
1350 #[cfg(not(unix))]
1351 matches!(matchbackslash8, "\\a", "/a");
1352
1353 nmatches!(matchnot1, "a*b*c", "abcd");
1354 nmatches!(matchnot2, "abc*abc*abc", "abcabcabcabcabcabcabca");
1355 nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt");
1356 nmatches!(matchnot4, "some/**/**/needle.txt", "some/other/notthis.txt");
1357 nmatches!(matchnot5, "/**/test", "test");
1358 nmatches!(matchnot6, "/**/test", "/one/notthis");
1359 nmatches!(matchnot7, "/**/test", "/notthis");
1360 nmatches!(matchnot8, "**/.*", "ab.c");
1361 nmatches!(matchnot9, "**/.*", "abc/ab.c");
1362 nmatches!(matchnot10, ".*/**", "a.bc");
1363 nmatches!(matchnot11, ".*/**", "abc/a.bc");
1364 nmatches!(matchnot12, "a[0-9]b", "a_b");
1365 nmatches!(matchnot13, "a[!0-9]b", "a0b");
1366 nmatches!(matchnot14, "a[!0-9]b", "a9b");
1367 nmatches!(matchnot15, "[!-]", "-");
1368 nmatches!(matchnot16, "*hello.txt", "hello.txt-and-then-some");
1369 nmatches!(matchnot17, "*hello.txt", "goodbye.txt");
1370 nmatches!(
1371 matchnot18,
1372 "*some/path/to/hello.txt",
1373 "some/path/to/hello.txt-and-then-some"
1374 );
1375 nmatches!(
1376 matchnot19,
1377 "*some/path/to/hello.txt",
1378 "some/other/path/to/hello.txt"
1379 );
1380 nmatches!(matchnot20, "a", "foo/a");
1381 nmatches!(matchnot21, "./foo", "foo");
1382 nmatches!(matchnot22, "**/foo", "foofoo");
1383 nmatches!(matchnot23, "**/foo/bar", "foofoo/bar");
1384 nmatches!(matchnot24, "/*.c", "mozilla-sha1/sha1.c");
1385 nmatches!(matchnot25, "*.c", "mozilla-sha1/sha1.c", SLASHLIT);
1386 nmatches!(
1387 matchnot26,
1388 "**/m4/ltoptions.m4",
1389 "csharp/src/packages/repositories.config",
1390 SLASHLIT
1391 );
1392 nmatches!(matchnot27, "a[^0-9]b", "a0b");
1393 nmatches!(matchnot28, "a[^0-9]b", "a9b");
1394 nmatches!(matchnot29, "[^-]", "-");
1395 nmatches!(matchnot30, "some/*/needle.txt", "some/needle.txt");
1396 nmatches!(
1397 matchrec31,
1398 "some/*/needle.txt",
1399 "some/one/two/needle.txt",
1400 SLASHLIT
1401 );
1402 nmatches!(
1403 matchrec32,
1404 "some/*/needle.txt",
1405 "some/one/two/three/needle.txt",
1406 SLASHLIT
1407 );
1408 nmatches!(matchrec33, ".*/**", ".abc");
1409 nmatches!(matchrec34, "foo/**", "foo");
1410
1411 macro_rules! extract {
1412 ($which:ident, $name:ident, $pat:expr, $expect:expr) => {
1413 extract!($which, $name, $pat, $expect, Options::default());
1414 };
1415 ($which:ident, $name:ident, $pat:expr, $expect:expr, $options:expr) => {
1416 #[test]
1417 fn $name() {
1418 let mut builder = GlobBuilder::new($pat);
1419 if let Some(casei) = $options.casei {
1420 builder.case_insensitive(casei);
1421 }
1422 if let Some(litsep) = $options.litsep {
1423 builder.literal_separator(litsep);
1424 }
1425 if let Some(bsesc) = $options.bsesc {
1426 builder.backslash_escape(bsesc);
1427 }
1428 let pat = builder.build().unwrap();
1429 assert_eq!($expect, pat.$which());
1430 }
1431 };
1432 }
1433
1434 macro_rules! literal {
1435 ($($tt:tt)*) => { extract!(literal, $($tt)*); }
1436 }
1437
1438 macro_rules! basetokens {
1439 ($($tt:tt)*) => { extract!(basename_tokens, $($tt)*); }
1440 }
1441
1442 macro_rules! ext {
1443 ($($tt:tt)*) => { extract!(ext, $($tt)*); }
1444 }
1445
1446 macro_rules! required_ext {
1447 ($($tt:tt)*) => { extract!(required_ext, $($tt)*); }
1448 }
1449
1450 macro_rules! prefix {
1451 ($($tt:tt)*) => { extract!(prefix, $($tt)*); }
1452 }
1453
1454 macro_rules! suffix {
1455 ($($tt:tt)*) => { extract!(suffix, $($tt)*); }
1456 }
1457
1458 macro_rules! baseliteral {
1459 ($($tt:tt)*) => { extract!(basename_literal, $($tt)*); }
1460 }
1461
1462 literal!(extract_lit1, "foo", Some(s("foo")));
1463 literal!(extract_lit2, "foo", None, CASEI);
1464 literal!(extract_lit3, "/foo", Some(s("/foo")));
1465 literal!(extract_lit4, "/foo/", Some(s("/foo/")));
1466 literal!(extract_lit5, "/foo/bar", Some(s("/foo/bar")));
1467 literal!(extract_lit6, "*.foo", None);
1468 literal!(extract_lit7, "foo/bar", Some(s("foo/bar")));
1469 literal!(extract_lit8, "**/foo/bar", None);
1470
1471 basetokens!(
1472 extract_basetoks1,
1473 "**/foo",
1474 Some(&*vec![Literal('f'), Literal('o'), Literal('o'),])
1475 );
1476 basetokens!(extract_basetoks2, "**/foo", None, CASEI);
1477 basetokens!(
1478 extract_basetoks3,
1479 "**/foo",
1480 Some(&*vec![Literal('f'), Literal('o'), Literal('o'),]),
1481 SLASHLIT
1482 );
1483 basetokens!(extract_basetoks4, "*foo", None, SLASHLIT);
1484 basetokens!(extract_basetoks5, "*foo", None);
1485 basetokens!(extract_basetoks6, "**/fo*o", None);
1486 basetokens!(
1487 extract_basetoks7,
1488 "**/fo*o",
1489 Some(&*vec![Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),]),
1490 SLASHLIT
1491 );
1492
1493 ext!(extract_ext1, "**/*.rs", Some(s(".rs")));
1494 ext!(extract_ext2, "**/*.rs.bak", None);
1495 ext!(extract_ext3, "*.rs", Some(s(".rs")));
1496 ext!(extract_ext4, "a*.rs", None);
1497 ext!(extract_ext5, "/*.c", None);
1498 ext!(extract_ext6, "*.c", None, SLASHLIT);
1499 ext!(extract_ext7, "*.c", Some(s(".c")));
1500
1501 required_ext!(extract_req_ext1, "*.rs", Some(s(".rs")));
1502 required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(s(".rs")));
1503 required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(s(".rs")));
1504 required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(s(".rs")));
1505 required_ext!(extract_req_ext5, ".rs", Some(s(".rs")));
1506 required_ext!(extract_req_ext6, "./rs", None);
1507 required_ext!(extract_req_ext7, "foo", None);
1508 required_ext!(extract_req_ext8, ".foo/", None);
1509 required_ext!(extract_req_ext9, "foo/", None);
1510
1511 prefix!(extract_prefix1, "/foo", Some(s("/foo")));
1512 prefix!(extract_prefix2, "/foo/*", Some(s("/foo/")));
1513 prefix!(extract_prefix3, "**/foo", None);
1514 prefix!(extract_prefix4, "foo/**", Some(s("foo/")));
1515
1516 suffix!(extract_suffix1, "**/foo/bar", Some((s("/foo/bar"), true)));
1517 suffix!(extract_suffix2, "*/foo/bar", Some((s("/foo/bar"), false)));
1518 suffix!(extract_suffix3, "*/foo/bar", None, SLASHLIT);
1519 suffix!(extract_suffix4, "foo/bar", Some((s("foo/bar"), false)));
1520 suffix!(extract_suffix5, "*.foo", Some((s(".foo"), false)));
1521 suffix!(extract_suffix6, "*.foo", None, SLASHLIT);
1522 suffix!(extract_suffix7, "**/*_test", Some((s("_test"), false)));
1523
1524 baseliteral!(extract_baselit1, "**/foo", Some(s("foo")));
1525 baseliteral!(extract_baselit2, "foo", None);
1526 baseliteral!(extract_baselit3, "*foo", None);
1527 baseliteral!(extract_baselit4, "*/foo", None);
1528}
1529