1/*!
2The globset crate provides cross platform single glob and glob set matching.
3
4Glob set matching is the process of matching one or more glob patterns against
5a single candidate path simultaneously, and returning all of the globs that
6matched. For example, given this set of globs:
7
8```ignore
9*.rs
10src/lib.rs
11src/**/foo.rs
12```
13
14and a path `src/bar/baz/foo.rs`, then the set would report the first and third
15globs as matching.
16
17# Example: one glob
18
19This example shows how to match a single glob against a single file path.
20
21```
22# fn example() -> Result<(), globset::Error> {
23use globset::Glob;
24
25let glob = Glob::new("*.rs")?.compile_matcher();
26
27assert!(glob.is_match("foo.rs"));
28assert!(glob.is_match("foo/bar.rs"));
29assert!(!glob.is_match("Cargo.toml"));
30# Ok(()) } example().unwrap();
31```
32
33# Example: configuring a glob matcher
34
35This example shows how to use a `GlobBuilder` to configure aspects of match
36semantics. In this example, we prevent wildcards from matching path separators.
37
38```
39# fn example() -> Result<(), globset::Error> {
40use globset::GlobBuilder;
41
42let glob = GlobBuilder::new("*.rs")
43 .literal_separator(true).build()?.compile_matcher();
44
45assert!(glob.is_match("foo.rs"));
46assert!(!glob.is_match("foo/bar.rs")); // no longer matches
47assert!(!glob.is_match("Cargo.toml"));
48# Ok(()) } example().unwrap();
49```
50
51# Example: match multiple globs at once
52
53This example shows how to match multiple glob patterns at once.
54
55```
56# fn example() -> Result<(), globset::Error> {
57use globset::{Glob, GlobSetBuilder};
58
59let mut builder = GlobSetBuilder::new();
60// A GlobBuilder can be used to configure each glob's match semantics
61// independently.
62builder.add(Glob::new("*.rs")?);
63builder.add(Glob::new("src/lib.rs")?);
64builder.add(Glob::new("src/**/foo.rs")?);
65let set = builder.build()?;
66
67assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
68# Ok(()) } example().unwrap();
69```
70
71# Syntax
72
73Standard Unix-style glob syntax is supported:
74
75* `?` matches any single character. (If the `literal_separator` option is
76 enabled, then `?` can never match a path separator.)
77* `*` matches zero or more characters. (If the `literal_separator` option is
78 enabled, then `*` can never match a path separator.)
79* `**` recursively matches directories but are only legal in three situations.
80 First, if the glob starts with <code>\*\*&#x2F;</code>, then it matches
81 all directories. For example, <code>\*\*&#x2F;foo</code> matches `foo`
82 and `bar/foo` but not `foo/bar`. Secondly, if the glob ends with
83 <code>&#x2F;\*\*</code>, then it matches all sub-entries. For example,
84 <code>foo&#x2F;\*\*</code> matches `foo/a` and `foo/a/b`, but not `foo`.
85 Thirdly, if the glob contains <code>&#x2F;\*\*&#x2F;</code> anywhere within
86 the pattern, then it matches zero or more directories. Using `**` anywhere
87 else is illegal (N.B. the glob `**` is allowed and means "match everything").
88* `{a,b}` matches `a` or `b` where `a` and `b` are arbitrary glob patterns.
89 (N.B. Nesting `{...}` is not currently allowed.)
90* `[ab]` matches `a` or `b` where `a` and `b` are characters. Use
91 `[!ab]` to match any character except for `a` and `b`.
92* Metacharacters such as `*` and `?` can be escaped with character class
93 notation. e.g., `[*]` matches `*`.
94* When backslash escapes are enabled, a backslash (`\`) will escape all meta
95 characters in a glob. If it precedes a non-meta character, then the slash is
96 ignored. A `\\` will match a literal `\\`. Note that this mode is only
97 enabled on Unix platforms by default, but can be enabled on any platform
98 via the `backslash_escape` setting on `Glob`.
99
100A `GlobBuilder` can be used to prevent wildcards from matching path separators,
101or to enable case insensitive matching.
102*/
103
104#![deny(missing_docs)]
105
106use std::borrow::Cow;
107use std::collections::{BTreeMap, HashMap};
108use std::error::Error as StdError;
109use std::fmt;
110use std::hash;
111use std::path::Path;
112use std::str;
113
114use aho_corasick::AhoCorasick;
115use bstr::{ByteSlice, ByteVec, B};
116use regex::bytes::{Regex, RegexBuilder, RegexSet};
117
118use crate::glob::MatchStrategy;
119pub use crate::glob::{Glob, GlobBuilder, GlobMatcher};
120use crate::pathutil::{file_name, file_name_ext, normalize_path};
121
122mod glob;
123mod pathutil;
124
125#[cfg(feature = "serde1")]
126mod serde_impl;
127
128#[cfg(feature = "log")]
129macro_rules! debug {
130 ($($token:tt)*) => (::log::debug!($($token)*);)
131}
132
133#[cfg(not(feature = "log"))]
134macro_rules! debug {
135 ($($token:tt)*) => {};
136}
137
138/// Represents an error that can occur when parsing a glob pattern.
139#[derive(Clone, Debug, Eq, PartialEq)]
140pub struct Error {
141 /// The original glob provided by the caller.
142 glob: Option<String>,
143 /// The kind of error.
144 kind: ErrorKind,
145}
146
147/// The kind of error that can occur when parsing a glob pattern.
148#[derive(Clone, Debug, Eq, PartialEq)]
149pub enum ErrorKind {
150 /// **DEPRECATED**.
151 ///
152 /// This error used to occur for consistency with git's glob specification,
153 /// but the specification now accepts all uses of `**`. When `**` does not
154 /// appear adjacent to a path separator or at the beginning/end of a glob,
155 /// it is now treated as two consecutive `*` patterns. As such, this error
156 /// is no longer used.
157 InvalidRecursive,
158 /// Occurs when a character class (e.g., `[abc]`) is not closed.
159 UnclosedClass,
160 /// Occurs when a range in a character (e.g., `[a-z]`) is invalid. For
161 /// example, if the range starts with a lexicographically larger character
162 /// than it ends with.
163 InvalidRange(char, char),
164 /// Occurs when a `}` is found without a matching `{`.
165 UnopenedAlternates,
166 /// Occurs when a `{` is found without a matching `}`.
167 UnclosedAlternates,
168 /// Occurs when an alternating group is nested inside another alternating
169 /// group, e.g., `{{a,b},{c,d}}`.
170 NestedAlternates,
171 /// Occurs when an unescaped '\' is found at the end of a glob.
172 DanglingEscape,
173 /// An error associated with parsing or compiling a regex.
174 Regex(String),
175 /// Hints that destructuring should not be exhaustive.
176 ///
177 /// This enum may grow additional variants, so this makes sure clients
178 /// don't count on exhaustive matching. (Otherwise, adding a new variant
179 /// could break existing code.)
180 #[doc(hidden)]
181 __Nonexhaustive,
182}
183
184impl StdError for Error {
185 fn description(&self) -> &str {
186 self.kind.description()
187 }
188}
189
190impl Error {
191 /// Return the glob that caused this error, if one exists.
192 pub fn glob(&self) -> Option<&str> {
193 self.glob.as_ref().map(|s: &String| &**s)
194 }
195
196 /// Return the kind of this error.
197 pub fn kind(&self) -> &ErrorKind {
198 &self.kind
199 }
200}
201
202impl ErrorKind {
203 fn description(&self) -> &str {
204 match *self {
205 ErrorKind::InvalidRecursive => {
206 "invalid use of **; must be one path component"
207 }
208 ErrorKind::UnclosedClass => {
209 "unclosed character class; missing ']'"
210 }
211 ErrorKind::InvalidRange(_, _) => "invalid character range",
212 ErrorKind::UnopenedAlternates => {
213 "unopened alternate group; missing '{' \
214 (maybe escape '}' with '[}]'?)"
215 }
216 ErrorKind::UnclosedAlternates => {
217 "unclosed alternate group; missing '}' \
218 (maybe escape '{' with '[{]'?)"
219 }
220 ErrorKind::NestedAlternates => {
221 "nested alternate groups are not allowed"
222 }
223 ErrorKind::DanglingEscape => "dangling '\\'",
224 ErrorKind::Regex(ref err) => err,
225 ErrorKind::__Nonexhaustive => unreachable!(),
226 }
227 }
228}
229
230impl fmt::Display for Error {
231 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
232 match self.glob {
233 None => self.kind.fmt(f),
234 Some(ref glob: &String) => {
235 write!(f, "error parsing glob '{}': {}", glob, self.kind)
236 }
237 }
238 }
239}
240
241impl fmt::Display for ErrorKind {
242 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
243 match *self {
244 ErrorKind::InvalidRecursive
245 | ErrorKind::UnclosedClass
246 | ErrorKind::UnopenedAlternates
247 | ErrorKind::UnclosedAlternates
248 | ErrorKind::NestedAlternates
249 | ErrorKind::DanglingEscape
250 | ErrorKind::Regex(_) => write!(f, "{}", self.description()),
251 ErrorKind::InvalidRange(s: char, e: char) => {
252 write!(f, "invalid range; '{}' > '{}'", s, e)
253 }
254 ErrorKind::__Nonexhaustive => unreachable!(),
255 }
256 }
257}
258
259fn new_regex(pat: &str) -> Result<Regex, Error> {
260 RegexBuilder::new(pat)
261 .dot_matches_new_line(true)
262 .size_limit(10 * (1 << 20))
263 .dfa_size_limit(10 * (1 << 20))
264 .build()
265 .map_err(|err: Error| Error {
266 glob: Some(pat.to_string()),
267 kind: ErrorKind::Regex(err.to_string()),
268 })
269}
270
271fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error>
272where
273 S: AsRef<str>,
274 I: IntoIterator<Item = S>,
275{
276 RegexSet::new(pats).map_err(|err: Error| Error {
277 glob: None,
278 kind: ErrorKind::Regex(err.to_string()),
279 })
280}
281
282type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
283
284/// GlobSet represents a group of globs that can be matched together in a
285/// single pass.
286#[derive(Clone, Debug)]
287pub struct GlobSet {
288 len: usize,
289 strats: Vec<GlobSetMatchStrategy>,
290}
291
292impl GlobSet {
293 /// Create an empty `GlobSet`. An empty set matches nothing.
294 #[inline]
295 pub fn empty() -> GlobSet {
296 GlobSet { len: 0, strats: vec![] }
297 }
298
299 /// Returns true if this set is empty, and therefore matches nothing.
300 #[inline]
301 pub fn is_empty(&self) -> bool {
302 self.len == 0
303 }
304
305 /// Returns the number of globs in this set.
306 #[inline]
307 pub fn len(&self) -> usize {
308 self.len
309 }
310
311 /// Returns true if any glob in this set matches the path given.
312 pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
313 self.is_match_candidate(&Candidate::new(path.as_ref()))
314 }
315
316 /// Returns true if any glob in this set matches the path given.
317 ///
318 /// This takes a Candidate as input, which can be used to amortize the
319 /// cost of preparing a path for matching.
320 pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
321 if self.is_empty() {
322 return false;
323 }
324 for strat in &self.strats {
325 if strat.is_match(path) {
326 return true;
327 }
328 }
329 false
330 }
331
332 /// Returns the sequence number of every glob pattern that matches the
333 /// given path.
334 pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> {
335 self.matches_candidate(&Candidate::new(path.as_ref()))
336 }
337
338 /// Returns the sequence number of every glob pattern that matches the
339 /// given path.
340 ///
341 /// This takes a Candidate as input, which can be used to amortize the
342 /// cost of preparing a path for matching.
343 pub fn matches_candidate(&self, path: &Candidate<'_>) -> Vec<usize> {
344 let mut into = vec![];
345 if self.is_empty() {
346 return into;
347 }
348 self.matches_candidate_into(path, &mut into);
349 into
350 }
351
352 /// Adds the sequence number of every glob pattern that matches the given
353 /// path to the vec given.
354 ///
355 /// `into` is cleared before matching begins, and contains the set of
356 /// sequence numbers (in ascending order) after matching ends. If no globs
357 /// were matched, then `into` will be empty.
358 pub fn matches_into<P: AsRef<Path>>(
359 &self,
360 path: P,
361 into: &mut Vec<usize>,
362 ) {
363 self.matches_candidate_into(&Candidate::new(path.as_ref()), into);
364 }
365
366 /// Adds the sequence number of every glob pattern that matches the given
367 /// path to the vec given.
368 ///
369 /// `into` is cleared before matching begins, and contains the set of
370 /// sequence numbers (in ascending order) after matching ends. If no globs
371 /// were matched, then `into` will be empty.
372 ///
373 /// This takes a Candidate as input, which can be used to amortize the
374 /// cost of preparing a path for matching.
375 pub fn matches_candidate_into(
376 &self,
377 path: &Candidate<'_>,
378 into: &mut Vec<usize>,
379 ) {
380 into.clear();
381 if self.is_empty() {
382 return;
383 }
384 for strat in &self.strats {
385 strat.matches_into(path, into);
386 }
387 into.sort();
388 into.dedup();
389 }
390
391 fn new(pats: &[Glob]) -> Result<GlobSet, Error> {
392 if pats.is_empty() {
393 return Ok(GlobSet { len: 0, strats: vec![] });
394 }
395 let mut lits = LiteralStrategy::new();
396 let mut base_lits = BasenameLiteralStrategy::new();
397 let mut exts = ExtensionStrategy::new();
398 let mut prefixes = MultiStrategyBuilder::new();
399 let mut suffixes = MultiStrategyBuilder::new();
400 let mut required_exts = RequiredExtensionStrategyBuilder::new();
401 let mut regexes = MultiStrategyBuilder::new();
402 for (i, p) in pats.iter().enumerate() {
403 match MatchStrategy::new(p) {
404 MatchStrategy::Literal(lit) => {
405 lits.add(i, lit);
406 }
407 MatchStrategy::BasenameLiteral(lit) => {
408 base_lits.add(i, lit);
409 }
410 MatchStrategy::Extension(ext) => {
411 exts.add(i, ext);
412 }
413 MatchStrategy::Prefix(prefix) => {
414 prefixes.add(i, prefix);
415 }
416 MatchStrategy::Suffix { suffix, component } => {
417 if component {
418 lits.add(i, suffix[1..].to_string());
419 }
420 suffixes.add(i, suffix);
421 }
422 MatchStrategy::RequiredExtension(ext) => {
423 required_exts.add(i, ext, p.regex().to_owned());
424 }
425 MatchStrategy::Regex => {
426 debug!("glob converted to regex: {:?}", p);
427 regexes.add(i, p.regex().to_owned());
428 }
429 }
430 }
431 debug!(
432 "built glob set; {} literals, {} basenames, {} extensions, \
433 {} prefixes, {} suffixes, {} required extensions, {} regexes",
434 lits.0.len(),
435 base_lits.0.len(),
436 exts.0.len(),
437 prefixes.literals.len(),
438 suffixes.literals.len(),
439 required_exts.0.len(),
440 regexes.literals.len()
441 );
442 Ok(GlobSet {
443 len: pats.len(),
444 strats: vec![
445 GlobSetMatchStrategy::Extension(exts),
446 GlobSetMatchStrategy::BasenameLiteral(base_lits),
447 GlobSetMatchStrategy::Literal(lits),
448 GlobSetMatchStrategy::Suffix(suffixes.suffix()),
449 GlobSetMatchStrategy::Prefix(prefixes.prefix()),
450 GlobSetMatchStrategy::RequiredExtension(
451 required_exts.build()?,
452 ),
453 GlobSetMatchStrategy::Regex(regexes.regex_set()?),
454 ],
455 })
456 }
457}
458
459impl Default for GlobSet {
460 /// Create a default empty GlobSet.
461 fn default() -> Self {
462 GlobSet::empty()
463 }
464}
465
466/// GlobSetBuilder builds a group of patterns that can be used to
467/// simultaneously match a file path.
468#[derive(Clone, Debug)]
469pub struct GlobSetBuilder {
470 pats: Vec<Glob>,
471}
472
473impl GlobSetBuilder {
474 /// Create a new GlobSetBuilder. A GlobSetBuilder can be used to add new
475 /// patterns. Once all patterns have been added, `build` should be called
476 /// to produce a `GlobSet`, which can then be used for matching.
477 pub fn new() -> GlobSetBuilder {
478 GlobSetBuilder { pats: vec![] }
479 }
480
481 /// Builds a new matcher from all of the glob patterns added so far.
482 ///
483 /// Once a matcher is built, no new patterns can be added to it.
484 pub fn build(&self) -> Result<GlobSet, Error> {
485 GlobSet::new(&self.pats)
486 }
487
488 /// Add a new pattern to this set.
489 pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder {
490 self.pats.push(pat);
491 self
492 }
493}
494
495/// A candidate path for matching.
496///
497/// All glob matching in this crate operates on `Candidate` values.
498/// Constructing candidates has a very small cost associated with it, so
499/// callers may find it beneficial to amortize that cost when matching a single
500/// path against multiple globs or sets of globs.
501#[derive(Clone, Debug)]
502pub struct Candidate<'a> {
503 path: Cow<'a, [u8]>,
504 basename: Cow<'a, [u8]>,
505 ext: Cow<'a, [u8]>,
506}
507
508impl<'a> Candidate<'a> {
509 /// Create a new candidate for matching from the given path.
510 pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
511 let path = normalize_path(Vec::from_path_lossy(path.as_ref()));
512 let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
513 let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
514 Candidate { path: path, basename: basename, ext: ext }
515 }
516
517 fn path_prefix(&self, max: usize) -> &[u8] {
518 if self.path.len() <= max {
519 &*self.path
520 } else {
521 &self.path[..max]
522 }
523 }
524
525 fn path_suffix(&self, max: usize) -> &[u8] {
526 if self.path.len() <= max {
527 &*self.path
528 } else {
529 &self.path[self.path.len() - max..]
530 }
531 }
532}
533
534#[derive(Clone, Debug)]
535enum GlobSetMatchStrategy {
536 Literal(LiteralStrategy),
537 BasenameLiteral(BasenameLiteralStrategy),
538 Extension(ExtensionStrategy),
539 Prefix(PrefixStrategy),
540 Suffix(SuffixStrategy),
541 RequiredExtension(RequiredExtensionStrategy),
542 Regex(RegexSetStrategy),
543}
544
545impl GlobSetMatchStrategy {
546 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
547 use self::GlobSetMatchStrategy::*;
548 match *self {
549 Literal(ref s) => s.is_match(candidate),
550 BasenameLiteral(ref s) => s.is_match(candidate),
551 Extension(ref s) => s.is_match(candidate),
552 Prefix(ref s) => s.is_match(candidate),
553 Suffix(ref s) => s.is_match(candidate),
554 RequiredExtension(ref s) => s.is_match(candidate),
555 Regex(ref s) => s.is_match(candidate),
556 }
557 }
558
559 fn matches_into(
560 &self,
561 candidate: &Candidate<'_>,
562 matches: &mut Vec<usize>,
563 ) {
564 use self::GlobSetMatchStrategy::*;
565 match *self {
566 Literal(ref s) => s.matches_into(candidate, matches),
567 BasenameLiteral(ref s) => s.matches_into(candidate, matches),
568 Extension(ref s) => s.matches_into(candidate, matches),
569 Prefix(ref s) => s.matches_into(candidate, matches),
570 Suffix(ref s) => s.matches_into(candidate, matches),
571 RequiredExtension(ref s) => s.matches_into(candidate, matches),
572 Regex(ref s) => s.matches_into(candidate, matches),
573 }
574 }
575}
576
577#[derive(Clone, Debug)]
578struct LiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
579
580impl LiteralStrategy {
581 fn new() -> LiteralStrategy {
582 LiteralStrategy(BTreeMap::new())
583 }
584
585 fn add(&mut self, global_index: usize, lit: String) {
586 self.0.entry(lit.into_bytes()).or_insert(default:vec![]).push(global_index);
587 }
588
589 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
590 self.0.contains_key(candidate.path.as_bytes())
591 }
592
593 #[inline(never)]
594 fn matches_into(
595 &self,
596 candidate: &Candidate<'_>,
597 matches: &mut Vec<usize>,
598 ) {
599 if let Some(hits: &Vec) = self.0.get(key:candidate.path.as_bytes()) {
600 matches.extend(iter:hits);
601 }
602 }
603}
604
605#[derive(Clone, Debug)]
606struct BasenameLiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
607
608impl BasenameLiteralStrategy {
609 fn new() -> BasenameLiteralStrategy {
610 BasenameLiteralStrategy(BTreeMap::new())
611 }
612
613 fn add(&mut self, global_index: usize, lit: String) {
614 self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
615 }
616
617 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
618 if candidate.basename.is_empty() {
619 return false;
620 }
621 self.0.contains_key(candidate.basename.as_bytes())
622 }
623
624 #[inline(never)]
625 fn matches_into(
626 &self,
627 candidate: &Candidate<'_>,
628 matches: &mut Vec<usize>,
629 ) {
630 if candidate.basename.is_empty() {
631 return;
632 }
633 if let Some(hits) = self.0.get(candidate.basename.as_bytes()) {
634 matches.extend(hits);
635 }
636 }
637}
638
639#[derive(Clone, Debug)]
640struct ExtensionStrategy(HashMap<Vec<u8>, Vec<usize>, Fnv>);
641
642impl ExtensionStrategy {
643 fn new() -> ExtensionStrategy {
644 ExtensionStrategy(HashMap::with_hasher(Fnv::default()))
645 }
646
647 fn add(&mut self, global_index: usize, ext: String) {
648 self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index);
649 }
650
651 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
652 if candidate.ext.is_empty() {
653 return false;
654 }
655 self.0.contains_key(candidate.ext.as_bytes())
656 }
657
658 #[inline(never)]
659 fn matches_into(
660 &self,
661 candidate: &Candidate<'_>,
662 matches: &mut Vec<usize>,
663 ) {
664 if candidate.ext.is_empty() {
665 return;
666 }
667 if let Some(hits) = self.0.get(candidate.ext.as_bytes()) {
668 matches.extend(hits);
669 }
670 }
671}
672
673#[derive(Clone, Debug)]
674struct PrefixStrategy {
675 matcher: AhoCorasick,
676 map: Vec<usize>,
677 longest: usize,
678}
679
680impl PrefixStrategy {
681 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
682 let path: &[u8] = candidate.path_prefix(self.longest);
683 for m: Match in self.matcher.find_overlapping_iter(haystack:path) {
684 if m.start() == 0 {
685 return true;
686 }
687 }
688 false
689 }
690
691 fn matches_into(
692 &self,
693 candidate: &Candidate<'_>,
694 matches: &mut Vec<usize>,
695 ) {
696 let path: &[u8] = candidate.path_prefix(self.longest);
697 for m: Match in self.matcher.find_overlapping_iter(haystack:path) {
698 if m.start() == 0 {
699 matches.push(self.map[m.pattern()]);
700 }
701 }
702 }
703}
704
705#[derive(Clone, Debug)]
706struct SuffixStrategy {
707 matcher: AhoCorasick,
708 map: Vec<usize>,
709 longest: usize,
710}
711
712impl SuffixStrategy {
713 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
714 let path: &[u8] = candidate.path_suffix(self.longest);
715 for m: Match in self.matcher.find_overlapping_iter(haystack:path) {
716 if m.end() == path.len() {
717 return true;
718 }
719 }
720 false
721 }
722
723 fn matches_into(
724 &self,
725 candidate: &Candidate<'_>,
726 matches: &mut Vec<usize>,
727 ) {
728 let path: &[u8] = candidate.path_suffix(self.longest);
729 for m: Match in self.matcher.find_overlapping_iter(haystack:path) {
730 if m.end() == path.len() {
731 matches.push(self.map[m.pattern()]);
732 }
733 }
734 }
735}
736
737#[derive(Clone, Debug)]
738struct RequiredExtensionStrategy(HashMap<Vec<u8>, Vec<(usize, Regex)>, Fnv>);
739
740impl RequiredExtensionStrategy {
741 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
742 if candidate.ext.is_empty() {
743 return false;
744 }
745 match self.0.get(candidate.ext.as_bytes()) {
746 None => false,
747 Some(regexes) => {
748 for &(_, ref re) in regexes {
749 if re.is_match(candidate.path.as_bytes()) {
750 return true;
751 }
752 }
753 false
754 }
755 }
756 }
757
758 #[inline(never)]
759 fn matches_into(
760 &self,
761 candidate: &Candidate<'_>,
762 matches: &mut Vec<usize>,
763 ) {
764 if candidate.ext.is_empty() {
765 return;
766 }
767 if let Some(regexes) = self.0.get(candidate.ext.as_bytes()) {
768 for &(global_index, ref re) in regexes {
769 if re.is_match(candidate.path.as_bytes()) {
770 matches.push(global_index);
771 }
772 }
773 }
774 }
775}
776
777#[derive(Clone, Debug)]
778struct RegexSetStrategy {
779 matcher: RegexSet,
780 map: Vec<usize>,
781}
782
783impl RegexSetStrategy {
784 fn is_match(&self, candidate: &Candidate<'_>) -> bool {
785 self.matcher.is_match(text:candidate.path.as_bytes())
786 }
787
788 fn matches_into(
789 &self,
790 candidate: &Candidate<'_>,
791 matches: &mut Vec<usize>,
792 ) {
793 for i: usize in self.matcher.matches(text:candidate.path.as_bytes()) {
794 matches.push(self.map[i]);
795 }
796 }
797}
798
799#[derive(Clone, Debug)]
800struct MultiStrategyBuilder {
801 literals: Vec<String>,
802 map: Vec<usize>,
803 longest: usize,
804}
805
806impl MultiStrategyBuilder {
807 fn new() -> MultiStrategyBuilder {
808 MultiStrategyBuilder { literals: vec![], map: vec![], longest: 0 }
809 }
810
811 fn add(&mut self, global_index: usize, literal: String) {
812 if literal.len() > self.longest {
813 self.longest = literal.len();
814 }
815 self.map.push(global_index);
816 self.literals.push(literal);
817 }
818
819 fn prefix(self) -> PrefixStrategy {
820 PrefixStrategy {
821 matcher: AhoCorasick::new_auto_configured(&self.literals),
822 map: self.map,
823 longest: self.longest,
824 }
825 }
826
827 fn suffix(self) -> SuffixStrategy {
828 SuffixStrategy {
829 matcher: AhoCorasick::new_auto_configured(&self.literals),
830 map: self.map,
831 longest: self.longest,
832 }
833 }
834
835 fn regex_set(self) -> Result<RegexSetStrategy, Error> {
836 Ok(RegexSetStrategy {
837 matcher: new_regex_set(self.literals)?,
838 map: self.map,
839 })
840 }
841}
842
843#[derive(Clone, Debug)]
844struct RequiredExtensionStrategyBuilder(
845 HashMap<Vec<u8>, Vec<(usize, String)>>,
846);
847
848impl RequiredExtensionStrategyBuilder {
849 fn new() -> RequiredExtensionStrategyBuilder {
850 RequiredExtensionStrategyBuilder(HashMap::new())
851 }
852
853 fn add(&mut self, global_index: usize, ext: String, regex: String) {
854 self.0
855 .entry(ext.into_bytes())
856 .or_insert(default:vec![])
857 .push((global_index, regex));
858 }
859
860 fn build(self) -> Result<RequiredExtensionStrategy, Error> {
861 let mut exts: HashMap, Vec<(usize, …)>, …> = HashMap::with_hasher(hash_builder:Fnv::default());
862 for (ext: Vec, regexes: Vec<(usize, String)>) in self.0.into_iter() {
863 exts.insert(k:ext.clone(), v:vec![]);
864 for (global_index: usize, regex: String) in regexes {
865 let compiled: Regex = new_regex(&regex)?;
866 exts.get_mut(&ext).unwrap().push((global_index, compiled));
867 }
868 }
869 Ok(RequiredExtensionStrategy(exts))
870 }
871}
872
873#[cfg(test)]
874mod tests {
875 use super::{GlobSet, GlobSetBuilder};
876 use crate::glob::Glob;
877
878 #[test]
879 fn set_works() {
880 let mut builder = GlobSetBuilder::new();
881 builder.add(Glob::new("src/**/*.rs").unwrap());
882 builder.add(Glob::new("*.c").unwrap());
883 builder.add(Glob::new("src/lib.rs").unwrap());
884 let set = builder.build().unwrap();
885
886 assert!(set.is_match("foo.c"));
887 assert!(set.is_match("src/foo.c"));
888 assert!(!set.is_match("foo.rs"));
889 assert!(!set.is_match("tests/foo.rs"));
890 assert!(set.is_match("src/foo.rs"));
891 assert!(set.is_match("src/grep/src/main.rs"));
892
893 let matches = set.matches("src/lib.rs");
894 assert_eq!(2, matches.len());
895 assert_eq!(0, matches[0]);
896 assert_eq!(2, matches[1]);
897 }
898
899 #[test]
900 fn empty_set_works() {
901 let set = GlobSetBuilder::new().build().unwrap();
902 assert!(!set.is_match(""));
903 assert!(!set.is_match("a"));
904 }
905
906 #[test]
907 fn default_set_is_empty_works() {
908 let set: GlobSet = Default::default();
909 assert!(!set.is_match(""));
910 assert!(!set.is_match("a"));
911 }
912}
913