1/*!
2The globset crate provides cross platform single glob and glob set matching.
3
4Glob set matching is the process of matching one or more glob patterns against
5a single candidate path simultaneously, and returning all of the globs that
6matched. For example, given this set of globs:
7
8* `*.rs`
9* `src/lib.rs`
10* `src/**/foo.rs`
11
12and a path `src/bar/baz/foo.rs`, then the set would report the first and third
13globs as matching.
14
15# Example: one glob
16
17This example shows how to match a single glob against a single file path.
18
19```
20use globset::Glob;
21
22let glob = Glob::new("*.rs")?.compile_matcher();
23
24assert!(glob.is_match("foo.rs"));
25assert!(glob.is_match("foo/bar.rs"));
26assert!(!glob.is_match("Cargo.toml"));
27# Ok::<(), Box<dyn std::error::Error>>(())
28```
29
30# Example: configuring a glob matcher
31
32This example shows how to use a `GlobBuilder` to configure aspects of match
33semantics. In this example, we prevent wildcards from matching path separators.
34
35```
36use globset::GlobBuilder;
37
38let glob = GlobBuilder::new("*.rs")
39 .literal_separator(true).build()?.compile_matcher();
40
41assert!(glob.is_match("foo.rs"));
42assert!(!glob.is_match("foo/bar.rs")); // no longer matches
43assert!(!glob.is_match("Cargo.toml"));
44# Ok::<(), Box<dyn std::error::Error>>(())
45```
46
47# Example: match multiple globs at once
48
49This example shows how to match multiple glob patterns at once.
50
51```
52use globset::{Glob, GlobSetBuilder};
53
54let mut builder = GlobSetBuilder::new();
55// A GlobBuilder can be used to configure each glob's match semantics
56// independently.
57builder.add(Glob::new("*.rs")?);
58builder.add(Glob::new("src/lib.rs")?);
59builder.add(Glob::new("src/**/foo.rs")?);
60let set = builder.build()?;
61
62assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
63# Ok::<(), Box<dyn std::error::Error>>(())
64```
65
66# Syntax
67
68Standard Unix-style glob syntax is supported:
69
70* `?` matches any single character. (If the `literal_separator` option is
71 enabled, then `?` can never match a path separator.)
72* `*` matches zero or more characters. (If the `literal_separator` option is
73 enabled, then `*` can never match a path separator.)
74* `**` recursively matches directories but are only legal in three situations.
75 First, if the glob starts with <code>\*\*&#x2F;</code>, then it matches
76 all directories. For example, <code>\*\*&#x2F;foo</code> matches `foo`
77 and `bar/foo` but not `foo/bar`. Secondly, if the glob ends with
78 <code>&#x2F;\*\*</code>, then it matches all sub-entries. For example,
79 <code>foo&#x2F;\*\*</code> matches `foo/a` and `foo/a/b`, but not `foo`.
80 Thirdly, if the glob contains <code>&#x2F;\*\*&#x2F;</code> anywhere within
81 the pattern, then it matches zero or more directories. Using `**` anywhere
82 else is illegal (N.B. the glob `**` is allowed and means "match everything").
83* `{a,b}` matches `a` or `b` where `a` and `b` are arbitrary glob patterns.
84 (N.B. Nesting `{...}` is not currently allowed.)
85* `[ab]` matches `a` or `b` where `a` and `b` are characters. Use
86 `[!ab]` to match any character except for `a` and `b`.
87* Metacharacters such as `*` and `?` can be escaped with character class
88 notation. e.g., `[*]` matches `*`.
89* When backslash escapes are enabled, a backslash (`\`) will escape all meta
90 characters in a glob. If it precedes a non-meta character, then the slash is
91 ignored. A `\\` will match a literal `\\`. Note that this mode is only
92 enabled on Unix platforms by default, but can be enabled on any platform
93 via the `backslash_escape` setting on `Glob`.
94
95A `GlobBuilder` can be used to prevent wildcards from matching path separators,
96or to enable case insensitive matching.
97*/
98
99#![deny(missing_docs)]
100
101use std::{
102 borrow::Cow,
103 panic::{RefUnwindSafe, UnwindSafe},
104 path::Path,
105 sync::Arc,
106};
107
108use {
109 aho_corasick::AhoCorasick,
110 bstr::{ByteSlice, ByteVec, B},
111 regex_automata::{
112 meta::Regex,
113 util::pool::{Pool, PoolGuard},
114 PatternSet,
115 },
116};
117
118use crate::{
119 glob::MatchStrategy,
120 pathutil::{file_name, file_name_ext, normalize_path},
121};
122
123pub use crate::glob::{Glob, GlobBuilder, GlobMatcher};
124
125mod fnv;
126mod glob;
127mod pathutil;
128
129#[cfg(feature = "serde1")]
130mod serde_impl;
131
132#[cfg(feature = "log")]
133macro_rules! debug {
134 ($($token:tt)*) => (::log::debug!($($token)*);)
135}
136
137#[cfg(not(feature = "log"))]
138macro_rules! debug {
139 ($($token:tt)*) => {};
140}
141
142/// Represents an error that can occur when parsing a glob pattern.
143#[derive(Clone, Debug, Eq, PartialEq)]
144pub struct Error {
145 /// The original glob provided by the caller.
146 glob: Option<String>,
147 /// The kind of error.
148 kind: ErrorKind,
149}
150
151/// The kind of error that can occur when parsing a glob pattern.
152#[derive(Clone, Debug, Eq, PartialEq)]
153pub enum ErrorKind {
154 /// **DEPRECATED**.
155 ///
156 /// This error used to occur for consistency with git's glob specification,
157 /// but the specification now accepts all uses of `**`. When `**` does not
158 /// appear adjacent to a path separator or at the beginning/end of a glob,
159 /// it is now treated as two consecutive `*` patterns. As such, this error
160 /// is no longer used.
161 InvalidRecursive,
162 /// Occurs when a character class (e.g., `[abc]`) is not closed.
163 UnclosedClass,
164 /// Occurs when a range in a character (e.g., `[a-z]`) is invalid. For
165 /// example, if the range starts with a lexicographically larger character
166 /// than it ends with.
167 InvalidRange(char, char),
168 /// Occurs when a `}` is found without a matching `{`.
169 UnopenedAlternates,
170 /// Occurs when a `{` is found without a matching `}`.
171 UnclosedAlternates,
172 /// Occurs when an alternating group is nested inside another alternating
173 /// group, e.g., `{{a,b},{c,d}}`.
174 NestedAlternates,
175 /// Occurs when an unescaped '\' is found at the end of a glob.
176 DanglingEscape,
177 /// An error associated with parsing or compiling a regex.
178 Regex(String),
179 /// Hints that destructuring should not be exhaustive.
180 ///
181 /// This enum may grow additional variants, so this makes sure clients
182 /// don't count on exhaustive matching. (Otherwise, adding a new variant
183 /// could break existing code.)
184 #[doc(hidden)]
185 __Nonexhaustive,
186}
187
188impl std::error::Error for Error {
189 fn description(&self) -> &str {
190 self.kind.description()
191 }
192}
193
194impl Error {
195 /// Return the glob that caused this error, if one exists.
196 pub fn glob(&self) -> Option<&str> {
197 self.glob.as_ref().map(|s: &String| &**s)
198 }
199
200 /// Return the kind of this error.
201 pub fn kind(&self) -> &ErrorKind {
202 &self.kind
203 }
204}
205
206impl ErrorKind {
207 fn description(&self) -> &str {
208 match *self {
209 ErrorKind::InvalidRecursive => {
210 "invalid use of **; must be one path component"
211 }
212 ErrorKind::UnclosedClass => {
213 "unclosed character class; missing ']'"
214 }
215 ErrorKind::InvalidRange(_, _) => "invalid character range",
216 ErrorKind::UnopenedAlternates => {
217 "unopened alternate group; missing '{' \
218 (maybe escape '}' with '[}]'?)"
219 }
220 ErrorKind::UnclosedAlternates => {
221 "unclosed alternate group; missing '}' \
222 (maybe escape '{' with '[{]'?)"
223 }
224 ErrorKind::NestedAlternates => {
225 "nested alternate groups are not allowed"
226 }
227 ErrorKind::DanglingEscape => "dangling '\\'",
228 ErrorKind::Regex(ref err) => err,
229 ErrorKind::__Nonexhaustive => unreachable!(),
230 }
231 }
232}
233
234impl std::fmt::Display for Error {
235 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
236 match self.glob {
237 None => self.kind.fmt(f),
238 Some(ref glob: &String) => {
239 write!(f, "error parsing glob '{}': {}", glob, self.kind)
240 }
241 }
242 }
243}
244
245impl std::fmt::Display for ErrorKind {
246 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
247 match *self {
248 ErrorKind::InvalidRecursive
249 | ErrorKind::UnclosedClass
250 | ErrorKind::UnopenedAlternates
251 | ErrorKind::UnclosedAlternates
252 | ErrorKind::NestedAlternates
253 | ErrorKind::DanglingEscape
254 | ErrorKind::Regex(_) => write!(f, "{}", self.description()),
255 ErrorKind::InvalidRange(s: char, e: char) => {
256 write!(f, "invalid range; '{}' > '{}'", s, e)
257 }
258 ErrorKind::__Nonexhaustive => unreachable!(),
259 }
260 }
261}
262
263fn new_regex(pat: &str) -> Result<Regex, Error> {
264 let syntax: Config = regex_automata::util::syntax::Config::new()
265 .utf8(false)
266 .dot_matches_new_line(yes:true);
267 let config: Config = Regex::config()
268 .utf8_empty(false)
269 .nfa_size_limit(Some(10 * (1 << 20)))
270 .hybrid_cache_capacity(limit:10 * (1 << 20));
271 Regex::builder().syntax(syntax).configure(config).build(pat).map_err(
272 |err: BuildError| Error {
273 glob: Some(pat.to_string()),
274 kind: ErrorKind::Regex(err.to_string()),
275 },
276 )
277}
278
279fn new_regex_set(pats: Vec<String>) -> Result<Regex, Error> {
280 let syntax: Config = regex_automata::util::syntax::Config::new()
281 .utf8(false)
282 .dot_matches_new_line(yes:true);
283 let config: Config = Regex::config()
284 .match_kind(regex_automata::MatchKind::All)
285 .utf8_empty(false)
286 .nfa_size_limit(Some(10 * (1 << 20)))
287 .hybrid_cache_capacity(limit:10 * (1 << 20));
288 Regex::builder()
289 .syntax(syntax)
290 .configure(config)
291 .build_many(&pats)
292 .map_err(|err: BuildError| Error {
293 glob: None,
294 kind: ErrorKind::Regex(err.to_string()),
295 })
296}
297
298/// GlobSet represents a group of globs that can be matched together in a
299/// single pass.
300#[derive(Clone, Debug)]
301pub struct GlobSet {
302 len: usize,
303 strats: Vec<GlobSetMatchStrategy>,
304}
305
306impl GlobSet {
307 /// Create a new [`GlobSetBuilder`]. A `GlobSetBuilder` can be used to add
308 /// new patterns. Once all patterns have been added, `build` should be
309 /// called to produce a `GlobSet`, which can then be used for matching.
310 #[inline]
311 pub fn builder() -> GlobSetBuilder {
312 GlobSetBuilder::new()
313 }
314
315 /// Create an empty `GlobSet`. An empty set matches nothing.
316 #[inline]
317 pub fn empty() -> GlobSet {
318 GlobSet { len: 0, strats: vec![] }
319 }
320
321 /// Returns true if this set is empty, and therefore matches nothing.
322 #[inline]
323 pub fn is_empty(&self) -> bool {
324 self.len == 0
325 }
326
327 /// Returns the number of globs in this set.
328 #[inline]
329 pub fn len(&self) -> usize {
330 self.len
331 }
332
333 /// Returns true if any glob in this set matches the path given.
334 pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
335 self.is_match_candidate(&Candidate::new(path.as_ref()))
336 }
337
338 /// Returns true if any glob in this set matches the path given.
339 ///
340 /// This takes a Candidate as input, which can be used to amortize the
341 /// cost of preparing a path for matching.
342 pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
343 if self.is_empty() {
344 return false;
345 }
346 for strat in &self.strats {
347 if strat.is_match(path) {
348 return true;
349 }
350 }
351 false
352 }
353
354 /// Returns the sequence number of every glob pattern that matches the
355 /// given path.
356 pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> {
357 self.matches_candidate(&Candidate::new(path.as_ref()))
358 }
359
360 /// Returns the sequence number of every glob pattern that matches the
361 /// given path.
362 ///
363 /// This takes a Candidate as input, which can be used to amortize the
364 /// cost of preparing a path for matching.
365 pub fn matches_candidate(&self, path: &Candidate<'_>) -> Vec<usize> {
366 let mut into = vec![];
367 if self.is_empty() {
368 return into;
369 }
370 self.matches_candidate_into(path, &mut into);
371 into
372 }
373
374 /// Adds the sequence number of every glob pattern that matches the given
375 /// path to the vec given.
376 ///
377 /// `into` is cleared before matching begins, and contains the set of
378 /// sequence numbers (in ascending order) after matching ends. If no globs
379 /// were matched, then `into` will be empty.
380 pub fn matches_into<P: AsRef<Path>>(
381 &self,
382 path: P,
383 into: &mut Vec<usize>,
384 ) {
385 self.matches_candidate_into(&Candidate::new(path.as_ref()), into);
386 }
387
388 /// Adds the sequence number of every glob pattern that matches the given
389 /// path to the vec given.
390 ///
391 /// `into` is cleared before matching begins, and contains the set of
392 /// sequence numbers (in ascending order) after matching ends. If no globs
393 /// were matched, then `into` will be empty.
394 ///
395 /// This takes a Candidate as input, which can be used to amortize the
396 /// cost of preparing a path for matching.
397 pub fn matches_candidate_into(
398 &self,
399 path: &Candidate<'_>,
400 into: &mut Vec<usize>,
401 ) {
402 into.clear();
403 if self.is_empty() {
404 return;
405 }
406 for strat in &self.strats {
407 strat.matches_into(path, into);
408 }
409 into.sort();
410 into.dedup();
411 }
412
413 fn new(pats: &[Glob]) -> Result<GlobSet, Error> {
414 if pats.is_empty() {
415 return Ok(GlobSet { len: 0, strats: vec![] });
416 }
417 let mut lits = LiteralStrategy::new();
418 let mut base_lits = BasenameLiteralStrategy::new();
419 let mut exts = ExtensionStrategy::new();
420 let mut prefixes = MultiStrategyBuilder::new();
421 let mut suffixes = MultiStrategyBuilder::new();
422 let mut required_exts = RequiredExtensionStrategyBuilder::new();
423 let mut regexes = MultiStrategyBuilder::new();
424 for (i, p) in pats.iter().enumerate() {
425 match MatchStrategy::new(p) {
426 MatchStrategy::Literal(lit) => {
427 lits.add(i, lit);
428 }
429 MatchStrategy::BasenameLiteral(lit) => {
430 base_lits.add(i, lit);
431 }
432 MatchStrategy::Extension(ext) => {
433 exts.add(i, ext);
434 }
435 MatchStrategy::Prefix(prefix) => {
436 prefixes.add(i, prefix);
437 }
438 MatchStrategy::Suffix { suffix, component } => {
439 if component {
440 lits.add(i, suffix[1..].to_string());
441 }
442 suffixes.add(i, suffix);
443 }
444 MatchStrategy::RequiredExtension(ext) => {
445 required_exts.add(i, ext, p.regex().to_owned());
446 }
447 MatchStrategy::Regex => {
448 debug!("glob converted to regex: {:?}", p);
449 regexes.add(i, p.regex().to_owned());
450 }
451 }
452 }
453 debug!(
454 "built glob set; {} literals, {} basenames, {} extensions, \
455 {} prefixes, {} suffixes, {} required extensions, {} regexes",
456 lits.0.len(),
457 base_lits.0.len(),
458 exts.0.len(),
459 prefixes.literals.len(),
460 suffixes.literals.len(),
461 required_exts.0.len(),
462 regexes.literals.len()
463 );
464 Ok(GlobSet {
465 len: pats.len(),
466 strats: vec![
467 GlobSetMatchStrategy::Extension(exts),
468 GlobSetMatchStrategy::BasenameLiteral(base_lits),
469 GlobSetMatchStrategy::Literal(lits),
470 GlobSetMatchStrategy::Suffix(suffixes.suffix()),
471 GlobSetMatchStrategy::Prefix(prefixes.prefix()),
472 GlobSetMatchStrategy::RequiredExtension(
473 required_exts.build()?,
474 ),
475 GlobSetMatchStrategy::Regex(regexes.regex_set()?),
476 ],
477 })
478 }
479}
480
481impl Default for GlobSet {
482 /// Create a default empty GlobSet.
483 fn default() -> Self {
484 GlobSet::empty()
485 }
486}
487
488/// GlobSetBuilder builds a group of patterns that can be used to
489/// simultaneously match a file path.
490#[derive(Clone, Debug)]
491pub struct GlobSetBuilder {
492 pats: Vec<Glob>,
493}
494
495impl GlobSetBuilder {
496 /// Create a new `GlobSetBuilder`. A `GlobSetBuilder` can be used to add new
497 /// patterns. Once all patterns have been added, `build` should be called
498 /// to produce a [`GlobSet`], which can then be used for matching.
499 pub fn new() -> GlobSetBuilder {
500 GlobSetBuilder { pats: vec![] }
501 }
502
503 /// Builds a new matcher from all of the glob patterns added so far.
504 ///
505 /// Once a matcher is built, no new patterns can be added to it.
506 pub fn build(&self) -> Result<GlobSet, Error> {
507 GlobSet::new(&self.pats)
508 }
509
510 /// Add a new pattern to this set.
511 pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder {
512 self.pats.push(pat);
513 self
514 }
515}
516
517/// A candidate path for matching.
518///
519/// All glob matching in this crate operates on `Candidate` values.
520/// Constructing candidates has a very small cost associated with it, so
521/// callers may find it beneficial to amortize that cost when matching a single
522/// path against multiple globs or sets of globs.
523#[derive(Clone)]
524pub struct Candidate<'a> {
525 path: Cow<'a, [u8]>,
526 basename: Cow<'a, [u8]>,
527 ext: Cow<'a, [u8]>,
528}
529
530impl<'a> std::fmt::Debug for Candidate<'a> {
531 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
532 f&mut DebugStruct<'_, '_>.debug_struct("Candidate")
533 .field("path", &self.path.as_bstr())
534 .field("basename", &self.basename.as_bstr())
535 .field(name:"ext", &self.ext.as_bstr())
536 .finish()
537 }
538}
539
540impl<'a> Candidate<'a> {
541 /// Create a new candidate for matching from the given path.
542 pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
543 let path = normalize_path(Vec::from_path_lossy(path.as_ref()));
544 let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
545 let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
546 Candidate { path, basename, ext }
547 }
548
549 fn path_prefix(&self, max: usize) -> &[u8] {
550 if self.path.len() <= max {
551 &*self.path
552 } else {
553 &self.path[..max]
554 }
555 }
556
557 fn path_suffix(&self, max: usize) -> &[u8] {
558 if self.path.len() <= max {
559 &*self.path
560 } else {
561 &self.path[self.path.len() - max..]
562 }
563 }
564}
565
566#[derive(Clone, Debug)]
567enum GlobSetMatchStrategy {
568 Literal(LiteralStrategy),
569 BasenameLiteral(BasenameLiteralStrategy),
570 Extension(ExtensionStrategy),
571 Prefix(PrefixStrategy),
572 Suffix(SuffixStrategy),
573 RequiredExtension(RequiredExtensionStrategy),
574 Regex(RegexSetStrategy),
575}
576
577impl GlobSetMatchStrategy {
578 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
579 use self::GlobSetMatchStrategy::*;
580 match *self {
581 Literal(ref s) => s.is_match(candidate),
582 BasenameLiteral(ref s) => s.is_match(candidate),
583 Extension(ref s) => s.is_match(candidate),
584 Prefix(ref s) => s.is_match(candidate),
585 Suffix(ref s) => s.is_match(candidate),
586 RequiredExtension(ref s) => s.is_match(candidate),
587 Regex(ref s) => s.is_match(candidate),
588 }
589 }
590
591 fn matches_into(
592 &self,
593 candidate: &Candidate<'_>,
594 matches: &mut Vec<usize>,
595 ) {
596 use self::GlobSetMatchStrategy::*;
597 match *self {
598 Literal(ref s) => s.matches_into(candidate, matches),
599 BasenameLiteral(ref s) => s.matches_into(candidate, matches),
600 Extension(ref s) => s.matches_into(candidate, matches),
601 Prefix(ref s) => s.matches_into(candidate, matches),
602 Suffix(ref s) => s.matches_into(candidate, matches),
603 RequiredExtension(ref s) => s.matches_into(candidate, matches),
604 Regex(ref s) => s.matches_into(candidate, matches),
605 }
606 }
607}
608
609#[derive(Clone, Debug)]
610struct LiteralStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>);
611
612impl LiteralStrategy {
613 fn new() -> LiteralStrategy {
614 LiteralStrategy(fnv::HashMap::default())
615 }
616
617 fn add(&mut self, global_index: usize, lit: String) {
618 self.0.entry(lit.into_bytes()).or_insert(default:vec![]).push(global_index);
619 }
620
621 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
622 self.0.contains_key(candidate.path.as_bytes())
623 }
624
625 #[inline(never)]
626 fn matches_into(
627 &self,
628 candidate: &Candidate<'_>,
629 matches: &mut Vec<usize>,
630 ) {
631 if let Some(hits: &Vec) = self.0.get(candidate.path.as_bytes()) {
632 matches.extend(iter:hits);
633 }
634 }
635}
636
637#[derive(Clone, Debug)]
638struct BasenameLiteralStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>);
639
640impl BasenameLiteralStrategy {
641 fn new() -> BasenameLiteralStrategy {
642 BasenameLiteralStrategy(fnv::HashMap::default())
643 }
644
645 fn add(&mut self, global_index: usize, lit: String) {
646 self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
647 }
648
649 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
650 if candidate.basename.is_empty() {
651 return false;
652 }
653 self.0.contains_key(candidate.basename.as_bytes())
654 }
655
656 #[inline(never)]
657 fn matches_into(
658 &self,
659 candidate: &Candidate<'_>,
660 matches: &mut Vec<usize>,
661 ) {
662 if candidate.basename.is_empty() {
663 return;
664 }
665 if let Some(hits) = self.0.get(candidate.basename.as_bytes()) {
666 matches.extend(hits);
667 }
668 }
669}
670
671#[derive(Clone, Debug)]
672struct ExtensionStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>);
673
674impl ExtensionStrategy {
675 fn new() -> ExtensionStrategy {
676 ExtensionStrategy(fnv::HashMap::default())
677 }
678
679 fn add(&mut self, global_index: usize, ext: String) {
680 self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index);
681 }
682
683 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
684 if candidate.ext.is_empty() {
685 return false;
686 }
687 self.0.contains_key(candidate.ext.as_bytes())
688 }
689
690 #[inline(never)]
691 fn matches_into(
692 &self,
693 candidate: &Candidate<'_>,
694 matches: &mut Vec<usize>,
695 ) {
696 if candidate.ext.is_empty() {
697 return;
698 }
699 if let Some(hits) = self.0.get(candidate.ext.as_bytes()) {
700 matches.extend(hits);
701 }
702 }
703}
704
705#[derive(Clone, Debug)]
706struct PrefixStrategy {
707 matcher: AhoCorasick,
708 map: Vec<usize>,
709 longest: usize,
710}
711
712impl PrefixStrategy {
713 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
714 let path: &[u8] = candidate.path_prefix(self.longest);
715 for m: Match in self.matcher.find_overlapping_iter(input:path) {
716 if m.start() == 0 {
717 return true;
718 }
719 }
720 false
721 }
722
723 fn matches_into(
724 &self,
725 candidate: &Candidate<'_>,
726 matches: &mut Vec<usize>,
727 ) {
728 let path: &[u8] = candidate.path_prefix(self.longest);
729 for m: Match in self.matcher.find_overlapping_iter(input:path) {
730 if m.start() == 0 {
731 matches.push(self.map[m.pattern()]);
732 }
733 }
734 }
735}
736
737#[derive(Clone, Debug)]
738struct SuffixStrategy {
739 matcher: AhoCorasick,
740 map: Vec<usize>,
741 longest: usize,
742}
743
744impl SuffixStrategy {
745 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
746 let path: &[u8] = candidate.path_suffix(self.longest);
747 for m: Match in self.matcher.find_overlapping_iter(input:path) {
748 if m.end() == path.len() {
749 return true;
750 }
751 }
752 false
753 }
754
755 fn matches_into(
756 &self,
757 candidate: &Candidate<'_>,
758 matches: &mut Vec<usize>,
759 ) {
760 let path: &[u8] = candidate.path_suffix(self.longest);
761 for m: Match in self.matcher.find_overlapping_iter(input:path) {
762 if m.end() == path.len() {
763 matches.push(self.map[m.pattern()]);
764 }
765 }
766 }
767}
768
769#[derive(Clone, Debug)]
770struct RequiredExtensionStrategy(fnv::HashMap<Vec<u8>, Vec<(usize, Regex)>>);
771
772impl RequiredExtensionStrategy {
773 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
774 if candidate.ext.is_empty() {
775 return false;
776 }
777 match self.0.get(candidate.ext.as_bytes()) {
778 None => false,
779 Some(regexes) => {
780 for &(_, ref re) in regexes {
781 if re.is_match(candidate.path.as_bytes()) {
782 return true;
783 }
784 }
785 false
786 }
787 }
788 }
789
790 #[inline(never)]
791 fn matches_into(
792 &self,
793 candidate: &Candidate<'_>,
794 matches: &mut Vec<usize>,
795 ) {
796 if candidate.ext.is_empty() {
797 return;
798 }
799 if let Some(regexes) = self.0.get(candidate.ext.as_bytes()) {
800 for &(global_index, ref re) in regexes {
801 if re.is_match(candidate.path.as_bytes()) {
802 matches.push(global_index);
803 }
804 }
805 }
806 }
807}
808
809#[derive(Clone, Debug)]
810struct RegexSetStrategy {
811 matcher: Regex,
812 map: Vec<usize>,
813 // We use a pool of PatternSets to hopefully allocating a fresh one on each
814 // call.
815 //
816 // TODO: In the next semver breaking release, we should drop this pool and
817 // expose an opaque type that wraps PatternSet. Then callers can provide
818 // it to `matches_into` directly. Callers might still want to use a pool
819 // or similar to amortize allocation, but that matches the status quo and
820 // absolves us of needing to do it here.
821 patset: Arc<Pool<PatternSet, PatternSetPoolFn>>,
822}
823
824type PatternSetPoolFn =
825 Box<dyn Fn() -> PatternSet + Send + Sync + UnwindSafe + RefUnwindSafe>;
826
827impl RegexSetStrategy {
828 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
829 self.matcher.is_match(input:candidate.path.as_bytes())
830 }
831
832 fn matches_into(
833 &self,
834 candidate: &Candidate<'_>,
835 matches: &mut Vec<usize>,
836 ) {
837 let input: Input<'_> = regex_automata::Input::new(haystack:candidate.path.as_bytes());
838 let mut patset: PoolGuard<'_, PatternSet, …> = self.patset.get();
839 patset.clear();
840 self.matcher.which_overlapping_matches(&input, &mut patset);
841 for i: PatternID in patset.iter() {
842 matches.push(self.map[i]);
843 }
844 PoolGuard::put(this:patset);
845 }
846}
847
848#[derive(Clone, Debug)]
849struct MultiStrategyBuilder {
850 literals: Vec<String>,
851 map: Vec<usize>,
852 longest: usize,
853}
854
855impl MultiStrategyBuilder {
856 fn new() -> MultiStrategyBuilder {
857 MultiStrategyBuilder { literals: vec![], map: vec![], longest: 0 }
858 }
859
860 fn add(&mut self, global_index: usize, literal: String) {
861 if literal.len() > self.longest {
862 self.longest = literal.len();
863 }
864 self.map.push(global_index);
865 self.literals.push(literal);
866 }
867
868 fn prefix(self) -> PrefixStrategy {
869 PrefixStrategy {
870 matcher: AhoCorasick::new(&self.literals).unwrap(),
871 map: self.map,
872 longest: self.longest,
873 }
874 }
875
876 fn suffix(self) -> SuffixStrategy {
877 SuffixStrategy {
878 matcher: AhoCorasick::new(&self.literals).unwrap(),
879 map: self.map,
880 longest: self.longest,
881 }
882 }
883
884 fn regex_set(self) -> Result<RegexSetStrategy, Error> {
885 let matcher = new_regex_set(self.literals)?;
886 let pattern_len = matcher.pattern_len();
887 let create: PatternSetPoolFn =
888 Box::new(move || PatternSet::new(pattern_len));
889 Ok(RegexSetStrategy {
890 matcher,
891 map: self.map,
892 patset: Arc::new(Pool::new(create)),
893 })
894 }
895}
896
897#[derive(Clone, Debug)]
898struct RequiredExtensionStrategyBuilder(
899 fnv::HashMap<Vec<u8>, Vec<(usize, String)>>,
900);
901
902impl RequiredExtensionStrategyBuilder {
903 fn new() -> RequiredExtensionStrategyBuilder {
904 RequiredExtensionStrategyBuilder(fnv::HashMap::default())
905 }
906
907 fn add(&mut self, global_index: usize, ext: String, regex: String) {
908 self.0
909 .entry(ext.into_bytes())
910 .or_insert(default:vec![])
911 .push((global_index, regex));
912 }
913
914 fn build(self) -> Result<RequiredExtensionStrategy, Error> {
915 let mut exts: HashMap, Vec<(usize, …)>, …> = fnv::HashMap::default();
916 for (ext: Vec, regexes: Vec<(usize, String)>) in self.0.into_iter() {
917 exts.insert(k:ext.clone(), v:vec![]);
918 for (global_index: usize, regex: String) in regexes {
919 let compiled: Regex = new_regex(&regex)?;
920 exts.get_mut(&ext).unwrap().push((global_index, compiled));
921 }
922 }
923 Ok(RequiredExtensionStrategy(exts))
924 }
925}
926
927/// Escape meta-characters within the given glob pattern.
928///
929/// The escaping works by surrounding meta-characters with brackets. For
930/// example, `*` becomes `[*]`.
931pub fn escape(s: &str) -> String {
932 let mut escaped: String = String::with_capacity(s.len());
933 for c: char in s.chars() {
934 match c {
935 // note that ! does not need escaping because it is only special
936 // inside brackets
937 '?' | '*' | '[' | ']' => {
938 escaped.push(ch:'[');
939 escaped.push(ch:c);
940 escaped.push(ch:']');
941 }
942 c: char => {
943 escaped.push(ch:c);
944 }
945 }
946 }
947 escaped
948}
949
950#[cfg(test)]
951mod tests {
952 use crate::glob::Glob;
953
954 use super::{GlobSet, GlobSetBuilder};
955
956 #[test]
957 fn set_works() {
958 let mut builder = GlobSetBuilder::new();
959 builder.add(Glob::new("src/**/*.rs").unwrap());
960 builder.add(Glob::new("*.c").unwrap());
961 builder.add(Glob::new("src/lib.rs").unwrap());
962 let set = builder.build().unwrap();
963
964 assert!(set.is_match("foo.c"));
965 assert!(set.is_match("src/foo.c"));
966 assert!(!set.is_match("foo.rs"));
967 assert!(!set.is_match("tests/foo.rs"));
968 assert!(set.is_match("src/foo.rs"));
969 assert!(set.is_match("src/grep/src/main.rs"));
970
971 let matches = set.matches("src/lib.rs");
972 assert_eq!(2, matches.len());
973 assert_eq!(0, matches[0]);
974 assert_eq!(2, matches[1]);
975 }
976
977 #[test]
978 fn empty_set_works() {
979 let set = GlobSetBuilder::new().build().unwrap();
980 assert!(!set.is_match(""));
981 assert!(!set.is_match("a"));
982 }
983
984 #[test]
985 fn default_set_is_empty_works() {
986 let set: GlobSet = Default::default();
987 assert!(!set.is_match(""));
988 assert!(!set.is_match("a"));
989 }
990
991 #[test]
992 fn escape() {
993 use super::escape;
994 assert_eq!("foo", escape("foo"));
995 assert_eq!("foo[*]", escape("foo*"));
996 assert_eq!("[[][]]", escape("[]"));
997 assert_eq!("[*][?]", escape("*?"));
998 assert_eq!("src/[*][*]/[*].rs", escape("src/**/*.rs"));
999 assert_eq!("bar[[]ab[]]baz", escape("bar[ab]baz"));
1000 assert_eq!("bar[[]!![]]!baz", escape("bar[!!]!baz"));
1001 }
1002
1003 // This tests that regex matching doesn't "remember" the results of
1004 // previous searches. That is, if any memory is reused from a previous
1005 // search, then it should be cleared first.
1006 #[test]
1007 fn set_does_not_remember() {
1008 let mut builder = GlobSetBuilder::new();
1009 builder.add(Glob::new("*foo*").unwrap());
1010 builder.add(Glob::new("*bar*").unwrap());
1011 builder.add(Glob::new("*quux*").unwrap());
1012 let set = builder.build().unwrap();
1013
1014 let matches = set.matches("ZfooZquuxZ");
1015 assert_eq!(2, matches.len());
1016 assert_eq!(0, matches[0]);
1017 assert_eq!(2, matches[1]);
1018
1019 let matches = set.matches("nada");
1020 assert_eq!(0, matches.len());
1021 }
1022}
1023