1 | /*! |
2 | The globset crate provides cross platform single glob and glob set matching. |
3 | |
4 | Glob set matching is the process of matching one or more glob patterns against |
5 | a single candidate path simultaneously, and returning all of the globs that |
6 | matched. For example, given this set of globs: |
7 | |
8 | * `*.rs` |
9 | * `src/lib.rs` |
10 | * `src/**/foo.rs` |
11 | |
12 | and a path `src/bar/baz/foo.rs`, then the set would report the first and third |
13 | globs as matching. |
14 | |
15 | # Example: one glob |
16 | |
17 | This example shows how to match a single glob against a single file path. |
18 | |
19 | ``` |
20 | use globset::Glob; |
21 | |
22 | let glob = Glob::new("*.rs" )?.compile_matcher(); |
23 | |
24 | assert!(glob.is_match("foo.rs" )); |
25 | assert!(glob.is_match("foo/bar.rs" )); |
26 | assert!(!glob.is_match("Cargo.toml" )); |
27 | # Ok::<(), Box<dyn std::error::Error>>(()) |
28 | ``` |
29 | |
30 | # Example: configuring a glob matcher |
31 | |
32 | This example shows how to use a `GlobBuilder` to configure aspects of match |
33 | semantics. In this example, we prevent wildcards from matching path separators. |
34 | |
35 | ``` |
36 | use globset::GlobBuilder; |
37 | |
38 | let glob = GlobBuilder::new("*.rs" ) |
39 | .literal_separator(true).build()?.compile_matcher(); |
40 | |
41 | assert!(glob.is_match("foo.rs" )); |
42 | assert!(!glob.is_match("foo/bar.rs" )); // no longer matches |
43 | assert!(!glob.is_match("Cargo.toml" )); |
44 | # Ok::<(), Box<dyn std::error::Error>>(()) |
45 | ``` |
46 | |
47 | # Example: match multiple globs at once |
48 | |
49 | This example shows how to match multiple glob patterns at once. |
50 | |
51 | ``` |
52 | use globset::{Glob, GlobSetBuilder}; |
53 | |
54 | let mut builder = GlobSetBuilder::new(); |
55 | // A GlobBuilder can be used to configure each glob's match semantics |
56 | // independently. |
57 | builder.add(Glob::new("*.rs" )?); |
58 | builder.add(Glob::new("src/lib.rs" )?); |
59 | builder.add(Glob::new("src/**/foo.rs" )?); |
60 | let set = builder.build()?; |
61 | |
62 | assert_eq!(set.matches("src/bar/baz/foo.rs" ), vec![0, 2]); |
63 | # Ok::<(), Box<dyn std::error::Error>>(()) |
64 | ``` |
65 | |
66 | # Syntax |
67 | |
68 | Standard Unix-style glob syntax is supported: |
69 | |
70 | * `?` matches any single character. (If the `literal_separator` option is |
71 | enabled, then `?` can never match a path separator.) |
72 | * `*` matches zero or more characters. (If the `literal_separator` option is |
73 | enabled, then `*` can never match a path separator.) |
74 | * `**` recursively matches directories but are only legal in three situations. |
75 | First, if the glob starts with <code>\*\*/</code>, then it matches |
76 | all directories. For example, <code>\*\*/foo</code> matches `foo` |
77 | and `bar/foo` but not `foo/bar`. Secondly, if the glob ends with |
78 | <code>/\*\*</code>, then it matches all sub-entries. For example, |
79 | <code>foo/\*\*</code> matches `foo/a` and `foo/a/b`, but not `foo`. |
80 | Thirdly, if the glob contains <code>/\*\*/</code> anywhere within |
81 | the pattern, then it matches zero or more directories. Using `**` anywhere |
82 | else is illegal (N.B. the glob `**` is allowed and means "match everything"). |
83 | * `{a,b}` matches `a` or `b` where `a` and `b` are arbitrary glob patterns. |
84 | (N.B. Nesting `{...}` is not currently allowed.) |
85 | * `[ab]` matches `a` or `b` where `a` and `b` are characters. Use |
86 | `[!ab]` to match any character except for `a` and `b`. |
87 | * Metacharacters such as `*` and `?` can be escaped with character class |
88 | notation. e.g., `[*]` matches `*`. |
89 | * When backslash escapes are enabled, a backslash (`\`) will escape all meta |
90 | characters in a glob. If it precedes a non-meta character, then the slash is |
91 | ignored. A `\\` will match a literal `\\`. Note that this mode is only |
92 | enabled on Unix platforms by default, but can be enabled on any platform |
93 | via the `backslash_escape` setting on `Glob`. |
94 | |
95 | A `GlobBuilder` can be used to prevent wildcards from matching path separators, |
96 | or to enable case insensitive matching. |
97 | */ |
98 | |
99 | #![deny (missing_docs)] |
100 | |
101 | use std::{ |
102 | borrow::Cow, |
103 | panic::{RefUnwindSafe, UnwindSafe}, |
104 | path::Path, |
105 | sync::Arc, |
106 | }; |
107 | |
108 | use { |
109 | aho_corasick::AhoCorasick, |
110 | bstr::{ByteSlice, ByteVec, B}, |
111 | regex_automata::{ |
112 | meta::Regex, |
113 | util::pool::{Pool, PoolGuard}, |
114 | PatternSet, |
115 | }, |
116 | }; |
117 | |
118 | use crate::{ |
119 | glob::MatchStrategy, |
120 | pathutil::{file_name, file_name_ext, normalize_path}, |
121 | }; |
122 | |
123 | pub use crate::glob::{Glob, GlobBuilder, GlobMatcher}; |
124 | |
125 | mod fnv; |
126 | mod glob; |
127 | mod pathutil; |
128 | |
129 | #[cfg (feature = "serde1" )] |
130 | mod serde_impl; |
131 | |
132 | #[cfg (feature = "log" )] |
133 | macro_rules! debug { |
134 | ($($token:tt)*) => (::log::debug!($($token)*);) |
135 | } |
136 | |
137 | #[cfg (not(feature = "log" ))] |
138 | macro_rules! debug { |
139 | ($($token:tt)*) => {}; |
140 | } |
141 | |
142 | /// Represents an error that can occur when parsing a glob pattern. |
143 | #[derive (Clone, Debug, Eq, PartialEq)] |
144 | pub struct Error { |
145 | /// The original glob provided by the caller. |
146 | glob: Option<String>, |
147 | /// The kind of error. |
148 | kind: ErrorKind, |
149 | } |
150 | |
151 | /// The kind of error that can occur when parsing a glob pattern. |
152 | #[derive (Clone, Debug, Eq, PartialEq)] |
153 | pub enum ErrorKind { |
154 | /// **DEPRECATED**. |
155 | /// |
156 | /// This error used to occur for consistency with git's glob specification, |
157 | /// but the specification now accepts all uses of `**`. When `**` does not |
158 | /// appear adjacent to a path separator or at the beginning/end of a glob, |
159 | /// it is now treated as two consecutive `*` patterns. As such, this error |
160 | /// is no longer used. |
161 | InvalidRecursive, |
162 | /// Occurs when a character class (e.g., `[abc]`) is not closed. |
163 | UnclosedClass, |
164 | /// Occurs when a range in a character (e.g., `[a-z]`) is invalid. For |
165 | /// example, if the range starts with a lexicographically larger character |
166 | /// than it ends with. |
167 | InvalidRange(char, char), |
168 | /// Occurs when a `}` is found without a matching `{`. |
169 | UnopenedAlternates, |
170 | /// Occurs when a `{` is found without a matching `}`. |
171 | UnclosedAlternates, |
172 | /// Occurs when an alternating group is nested inside another alternating |
173 | /// group, e.g., `{{a,b},{c,d}}`. |
174 | NestedAlternates, |
175 | /// Occurs when an unescaped '\' is found at the end of a glob. |
176 | DanglingEscape, |
177 | /// An error associated with parsing or compiling a regex. |
178 | Regex(String), |
179 | /// Hints that destructuring should not be exhaustive. |
180 | /// |
181 | /// This enum may grow additional variants, so this makes sure clients |
182 | /// don't count on exhaustive matching. (Otherwise, adding a new variant |
183 | /// could break existing code.) |
184 | #[doc (hidden)] |
185 | __Nonexhaustive, |
186 | } |
187 | |
188 | impl std::error::Error for Error { |
189 | fn description(&self) -> &str { |
190 | self.kind.description() |
191 | } |
192 | } |
193 | |
194 | impl Error { |
195 | /// Return the glob that caused this error, if one exists. |
196 | pub fn glob(&self) -> Option<&str> { |
197 | self.glob.as_ref().map(|s: &String| &**s) |
198 | } |
199 | |
200 | /// Return the kind of this error. |
201 | pub fn kind(&self) -> &ErrorKind { |
202 | &self.kind |
203 | } |
204 | } |
205 | |
206 | impl ErrorKind { |
207 | fn description(&self) -> &str { |
208 | match *self { |
209 | ErrorKind::InvalidRecursive => { |
210 | "invalid use of **; must be one path component" |
211 | } |
212 | ErrorKind::UnclosedClass => { |
213 | "unclosed character class; missing ']'" |
214 | } |
215 | ErrorKind::InvalidRange(_, _) => "invalid character range" , |
216 | ErrorKind::UnopenedAlternates => { |
217 | "unopened alternate group; missing '{' \ |
218 | (maybe escape '}' with '[}]'?)" |
219 | } |
220 | ErrorKind::UnclosedAlternates => { |
221 | "unclosed alternate group; missing '}' \ |
222 | (maybe escape '{' with '[{]'?)" |
223 | } |
224 | ErrorKind::NestedAlternates => { |
225 | "nested alternate groups are not allowed" |
226 | } |
227 | ErrorKind::DanglingEscape => "dangling ' \\'" , |
228 | ErrorKind::Regex(ref err) => err, |
229 | ErrorKind::__Nonexhaustive => unreachable!(), |
230 | } |
231 | } |
232 | } |
233 | |
234 | impl std::fmt::Display for Error { |
235 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
236 | match self.glob { |
237 | None => self.kind.fmt(f), |
238 | Some(ref glob: &String) => { |
239 | write!(f, "error parsing glob ' {}': {}" , glob, self.kind) |
240 | } |
241 | } |
242 | } |
243 | } |
244 | |
245 | impl std::fmt::Display for ErrorKind { |
246 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
247 | match *self { |
248 | ErrorKind::InvalidRecursive |
249 | | ErrorKind::UnclosedClass |
250 | | ErrorKind::UnopenedAlternates |
251 | | ErrorKind::UnclosedAlternates |
252 | | ErrorKind::NestedAlternates |
253 | | ErrorKind::DanglingEscape |
254 | | ErrorKind::Regex(_) => write!(f, " {}" , self.description()), |
255 | ErrorKind::InvalidRange(s: char, e: char) => { |
256 | write!(f, "invalid range; ' {}' > ' {}'" , s, e) |
257 | } |
258 | ErrorKind::__Nonexhaustive => unreachable!(), |
259 | } |
260 | } |
261 | } |
262 | |
263 | fn new_regex(pat: &str) -> Result<Regex, Error> { |
264 | let syntax: Config = regex_automata::util::syntax::Config::new() |
265 | .utf8(false) |
266 | .dot_matches_new_line(yes:true); |
267 | let config: Config = Regex::config() |
268 | .utf8_empty(false) |
269 | .nfa_size_limit(Some(10 * (1 << 20))) |
270 | .hybrid_cache_capacity(limit:10 * (1 << 20)); |
271 | Regex::builder().syntax(syntax).configure(config).build(pat).map_err( |
272 | |err: BuildError| Error { |
273 | glob: Some(pat.to_string()), |
274 | kind: ErrorKind::Regex(err.to_string()), |
275 | }, |
276 | ) |
277 | } |
278 | |
279 | fn new_regex_set(pats: Vec<String>) -> Result<Regex, Error> { |
280 | let syntax: Config = regex_automata::util::syntax::Config::new() |
281 | .utf8(false) |
282 | .dot_matches_new_line(yes:true); |
283 | let config: Config = Regex::config() |
284 | .match_kind(regex_automata::MatchKind::All) |
285 | .utf8_empty(false) |
286 | .nfa_size_limit(Some(10 * (1 << 20))) |
287 | .hybrid_cache_capacity(limit:10 * (1 << 20)); |
288 | Regex::builder() |
289 | .syntax(syntax) |
290 | .configure(config) |
291 | .build_many(&pats) |
292 | .map_err(|err: BuildError| Error { |
293 | glob: None, |
294 | kind: ErrorKind::Regex(err.to_string()), |
295 | }) |
296 | } |
297 | |
298 | /// GlobSet represents a group of globs that can be matched together in a |
299 | /// single pass. |
300 | #[derive (Clone, Debug)] |
301 | pub struct GlobSet { |
302 | len: usize, |
303 | strats: Vec<GlobSetMatchStrategy>, |
304 | } |
305 | |
306 | impl GlobSet { |
307 | /// Create a new [`GlobSetBuilder`]. A `GlobSetBuilder` can be used to add |
308 | /// new patterns. Once all patterns have been added, `build` should be |
309 | /// called to produce a `GlobSet`, which can then be used for matching. |
310 | #[inline ] |
311 | pub fn builder() -> GlobSetBuilder { |
312 | GlobSetBuilder::new() |
313 | } |
314 | |
315 | /// Create an empty `GlobSet`. An empty set matches nothing. |
316 | #[inline ] |
317 | pub fn empty() -> GlobSet { |
318 | GlobSet { len: 0, strats: vec![] } |
319 | } |
320 | |
321 | /// Returns true if this set is empty, and therefore matches nothing. |
322 | #[inline ] |
323 | pub fn is_empty(&self) -> bool { |
324 | self.len == 0 |
325 | } |
326 | |
327 | /// Returns the number of globs in this set. |
328 | #[inline ] |
329 | pub fn len(&self) -> usize { |
330 | self.len |
331 | } |
332 | |
333 | /// Returns true if any glob in this set matches the path given. |
334 | pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool { |
335 | self.is_match_candidate(&Candidate::new(path.as_ref())) |
336 | } |
337 | |
338 | /// Returns true if any glob in this set matches the path given. |
339 | /// |
340 | /// This takes a Candidate as input, which can be used to amortize the |
341 | /// cost of preparing a path for matching. |
342 | pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool { |
343 | if self.is_empty() { |
344 | return false; |
345 | } |
346 | for strat in &self.strats { |
347 | if strat.is_match(path) { |
348 | return true; |
349 | } |
350 | } |
351 | false |
352 | } |
353 | |
354 | /// Returns the sequence number of every glob pattern that matches the |
355 | /// given path. |
356 | pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> { |
357 | self.matches_candidate(&Candidate::new(path.as_ref())) |
358 | } |
359 | |
360 | /// Returns the sequence number of every glob pattern that matches the |
361 | /// given path. |
362 | /// |
363 | /// This takes a Candidate as input, which can be used to amortize the |
364 | /// cost of preparing a path for matching. |
365 | pub fn matches_candidate(&self, path: &Candidate<'_>) -> Vec<usize> { |
366 | let mut into = vec![]; |
367 | if self.is_empty() { |
368 | return into; |
369 | } |
370 | self.matches_candidate_into(path, &mut into); |
371 | into |
372 | } |
373 | |
374 | /// Adds the sequence number of every glob pattern that matches the given |
375 | /// path to the vec given. |
376 | /// |
377 | /// `into` is cleared before matching begins, and contains the set of |
378 | /// sequence numbers (in ascending order) after matching ends. If no globs |
379 | /// were matched, then `into` will be empty. |
380 | pub fn matches_into<P: AsRef<Path>>( |
381 | &self, |
382 | path: P, |
383 | into: &mut Vec<usize>, |
384 | ) { |
385 | self.matches_candidate_into(&Candidate::new(path.as_ref()), into); |
386 | } |
387 | |
388 | /// Adds the sequence number of every glob pattern that matches the given |
389 | /// path to the vec given. |
390 | /// |
391 | /// `into` is cleared before matching begins, and contains the set of |
392 | /// sequence numbers (in ascending order) after matching ends. If no globs |
393 | /// were matched, then `into` will be empty. |
394 | /// |
395 | /// This takes a Candidate as input, which can be used to amortize the |
396 | /// cost of preparing a path for matching. |
397 | pub fn matches_candidate_into( |
398 | &self, |
399 | path: &Candidate<'_>, |
400 | into: &mut Vec<usize>, |
401 | ) { |
402 | into.clear(); |
403 | if self.is_empty() { |
404 | return; |
405 | } |
406 | for strat in &self.strats { |
407 | strat.matches_into(path, into); |
408 | } |
409 | into.sort(); |
410 | into.dedup(); |
411 | } |
412 | |
413 | fn new(pats: &[Glob]) -> Result<GlobSet, Error> { |
414 | if pats.is_empty() { |
415 | return Ok(GlobSet { len: 0, strats: vec![] }); |
416 | } |
417 | let mut lits = LiteralStrategy::new(); |
418 | let mut base_lits = BasenameLiteralStrategy::new(); |
419 | let mut exts = ExtensionStrategy::new(); |
420 | let mut prefixes = MultiStrategyBuilder::new(); |
421 | let mut suffixes = MultiStrategyBuilder::new(); |
422 | let mut required_exts = RequiredExtensionStrategyBuilder::new(); |
423 | let mut regexes = MultiStrategyBuilder::new(); |
424 | for (i, p) in pats.iter().enumerate() { |
425 | match MatchStrategy::new(p) { |
426 | MatchStrategy::Literal(lit) => { |
427 | lits.add(i, lit); |
428 | } |
429 | MatchStrategy::BasenameLiteral(lit) => { |
430 | base_lits.add(i, lit); |
431 | } |
432 | MatchStrategy::Extension(ext) => { |
433 | exts.add(i, ext); |
434 | } |
435 | MatchStrategy::Prefix(prefix) => { |
436 | prefixes.add(i, prefix); |
437 | } |
438 | MatchStrategy::Suffix { suffix, component } => { |
439 | if component { |
440 | lits.add(i, suffix[1..].to_string()); |
441 | } |
442 | suffixes.add(i, suffix); |
443 | } |
444 | MatchStrategy::RequiredExtension(ext) => { |
445 | required_exts.add(i, ext, p.regex().to_owned()); |
446 | } |
447 | MatchStrategy::Regex => { |
448 | debug!("glob converted to regex: {:?}" , p); |
449 | regexes.add(i, p.regex().to_owned()); |
450 | } |
451 | } |
452 | } |
453 | debug!( |
454 | "built glob set; {} literals, {} basenames, {} extensions, \ |
455 | {} prefixes, {} suffixes, {} required extensions, {} regexes" , |
456 | lits.0.len(), |
457 | base_lits.0.len(), |
458 | exts.0.len(), |
459 | prefixes.literals.len(), |
460 | suffixes.literals.len(), |
461 | required_exts.0.len(), |
462 | regexes.literals.len() |
463 | ); |
464 | Ok(GlobSet { |
465 | len: pats.len(), |
466 | strats: vec![ |
467 | GlobSetMatchStrategy::Extension(exts), |
468 | GlobSetMatchStrategy::BasenameLiteral(base_lits), |
469 | GlobSetMatchStrategy::Literal(lits), |
470 | GlobSetMatchStrategy::Suffix(suffixes.suffix()), |
471 | GlobSetMatchStrategy::Prefix(prefixes.prefix()), |
472 | GlobSetMatchStrategy::RequiredExtension( |
473 | required_exts.build()?, |
474 | ), |
475 | GlobSetMatchStrategy::Regex(regexes.regex_set()?), |
476 | ], |
477 | }) |
478 | } |
479 | } |
480 | |
481 | impl Default for GlobSet { |
482 | /// Create a default empty GlobSet. |
483 | fn default() -> Self { |
484 | GlobSet::empty() |
485 | } |
486 | } |
487 | |
488 | /// GlobSetBuilder builds a group of patterns that can be used to |
489 | /// simultaneously match a file path. |
490 | #[derive (Clone, Debug)] |
491 | pub struct GlobSetBuilder { |
492 | pats: Vec<Glob>, |
493 | } |
494 | |
495 | impl GlobSetBuilder { |
496 | /// Create a new `GlobSetBuilder`. A `GlobSetBuilder` can be used to add new |
497 | /// patterns. Once all patterns have been added, `build` should be called |
498 | /// to produce a [`GlobSet`], which can then be used for matching. |
499 | pub fn new() -> GlobSetBuilder { |
500 | GlobSetBuilder { pats: vec![] } |
501 | } |
502 | |
503 | /// Builds a new matcher from all of the glob patterns added so far. |
504 | /// |
505 | /// Once a matcher is built, no new patterns can be added to it. |
506 | pub fn build(&self) -> Result<GlobSet, Error> { |
507 | GlobSet::new(&self.pats) |
508 | } |
509 | |
510 | /// Add a new pattern to this set. |
511 | pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder { |
512 | self.pats.push(pat); |
513 | self |
514 | } |
515 | } |
516 | |
517 | /// A candidate path for matching. |
518 | /// |
519 | /// All glob matching in this crate operates on `Candidate` values. |
520 | /// Constructing candidates has a very small cost associated with it, so |
521 | /// callers may find it beneficial to amortize that cost when matching a single |
522 | /// path against multiple globs or sets of globs. |
523 | #[derive (Clone)] |
524 | pub struct Candidate<'a> { |
525 | path: Cow<'a, [u8]>, |
526 | basename: Cow<'a, [u8]>, |
527 | ext: Cow<'a, [u8]>, |
528 | } |
529 | |
530 | impl<'a> std::fmt::Debug for Candidate<'a> { |
531 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
532 | f&mut DebugStruct<'_, '_>.debug_struct("Candidate" ) |
533 | .field("path" , &self.path.as_bstr()) |
534 | .field("basename" , &self.basename.as_bstr()) |
535 | .field(name:"ext" , &self.ext.as_bstr()) |
536 | .finish() |
537 | } |
538 | } |
539 | |
540 | impl<'a> Candidate<'a> { |
541 | /// Create a new candidate for matching from the given path. |
542 | pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> { |
543 | let path = normalize_path(Vec::from_path_lossy(path.as_ref())); |
544 | let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("" ))); |
545 | let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("" ))); |
546 | Candidate { path, basename, ext } |
547 | } |
548 | |
549 | fn path_prefix(&self, max: usize) -> &[u8] { |
550 | if self.path.len() <= max { |
551 | &*self.path |
552 | } else { |
553 | &self.path[..max] |
554 | } |
555 | } |
556 | |
557 | fn path_suffix(&self, max: usize) -> &[u8] { |
558 | if self.path.len() <= max { |
559 | &*self.path |
560 | } else { |
561 | &self.path[self.path.len() - max..] |
562 | } |
563 | } |
564 | } |
565 | |
566 | #[derive (Clone, Debug)] |
567 | enum GlobSetMatchStrategy { |
568 | Literal(LiteralStrategy), |
569 | BasenameLiteral(BasenameLiteralStrategy), |
570 | Extension(ExtensionStrategy), |
571 | Prefix(PrefixStrategy), |
572 | Suffix(SuffixStrategy), |
573 | RequiredExtension(RequiredExtensionStrategy), |
574 | Regex(RegexSetStrategy), |
575 | } |
576 | |
577 | impl GlobSetMatchStrategy { |
578 | fn is_match(&self, candidate: &Candidate<'_>) -> bool { |
579 | use self::GlobSetMatchStrategy::*; |
580 | match *self { |
581 | Literal(ref s) => s.is_match(candidate), |
582 | BasenameLiteral(ref s) => s.is_match(candidate), |
583 | Extension(ref s) => s.is_match(candidate), |
584 | Prefix(ref s) => s.is_match(candidate), |
585 | Suffix(ref s) => s.is_match(candidate), |
586 | RequiredExtension(ref s) => s.is_match(candidate), |
587 | Regex(ref s) => s.is_match(candidate), |
588 | } |
589 | } |
590 | |
591 | fn matches_into( |
592 | &self, |
593 | candidate: &Candidate<'_>, |
594 | matches: &mut Vec<usize>, |
595 | ) { |
596 | use self::GlobSetMatchStrategy::*; |
597 | match *self { |
598 | Literal(ref s) => s.matches_into(candidate, matches), |
599 | BasenameLiteral(ref s) => s.matches_into(candidate, matches), |
600 | Extension(ref s) => s.matches_into(candidate, matches), |
601 | Prefix(ref s) => s.matches_into(candidate, matches), |
602 | Suffix(ref s) => s.matches_into(candidate, matches), |
603 | RequiredExtension(ref s) => s.matches_into(candidate, matches), |
604 | Regex(ref s) => s.matches_into(candidate, matches), |
605 | } |
606 | } |
607 | } |
608 | |
609 | #[derive (Clone, Debug)] |
610 | struct LiteralStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>); |
611 | |
612 | impl LiteralStrategy { |
613 | fn new() -> LiteralStrategy { |
614 | LiteralStrategy(fnv::HashMap::default()) |
615 | } |
616 | |
617 | fn add(&mut self, global_index: usize, lit: String) { |
618 | self.0.entry(lit.into_bytes()).or_insert(default:vec![]).push(global_index); |
619 | } |
620 | |
621 | fn is_match(&self, candidate: &Candidate<'_>) -> bool { |
622 | self.0.contains_key(candidate.path.as_bytes()) |
623 | } |
624 | |
625 | #[inline (never)] |
626 | fn matches_into( |
627 | &self, |
628 | candidate: &Candidate<'_>, |
629 | matches: &mut Vec<usize>, |
630 | ) { |
631 | if let Some(hits: &Vec) = self.0.get(candidate.path.as_bytes()) { |
632 | matches.extend(iter:hits); |
633 | } |
634 | } |
635 | } |
636 | |
637 | #[derive (Clone, Debug)] |
638 | struct BasenameLiteralStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>); |
639 | |
640 | impl BasenameLiteralStrategy { |
641 | fn new() -> BasenameLiteralStrategy { |
642 | BasenameLiteralStrategy(fnv::HashMap::default()) |
643 | } |
644 | |
645 | fn add(&mut self, global_index: usize, lit: String) { |
646 | self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index); |
647 | } |
648 | |
649 | fn is_match(&self, candidate: &Candidate<'_>) -> bool { |
650 | if candidate.basename.is_empty() { |
651 | return false; |
652 | } |
653 | self.0.contains_key(candidate.basename.as_bytes()) |
654 | } |
655 | |
656 | #[inline (never)] |
657 | fn matches_into( |
658 | &self, |
659 | candidate: &Candidate<'_>, |
660 | matches: &mut Vec<usize>, |
661 | ) { |
662 | if candidate.basename.is_empty() { |
663 | return; |
664 | } |
665 | if let Some(hits) = self.0.get(candidate.basename.as_bytes()) { |
666 | matches.extend(hits); |
667 | } |
668 | } |
669 | } |
670 | |
671 | #[derive (Clone, Debug)] |
672 | struct ExtensionStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>); |
673 | |
674 | impl ExtensionStrategy { |
675 | fn new() -> ExtensionStrategy { |
676 | ExtensionStrategy(fnv::HashMap::default()) |
677 | } |
678 | |
679 | fn add(&mut self, global_index: usize, ext: String) { |
680 | self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index); |
681 | } |
682 | |
683 | fn is_match(&self, candidate: &Candidate<'_>) -> bool { |
684 | if candidate.ext.is_empty() { |
685 | return false; |
686 | } |
687 | self.0.contains_key(candidate.ext.as_bytes()) |
688 | } |
689 | |
690 | #[inline (never)] |
691 | fn matches_into( |
692 | &self, |
693 | candidate: &Candidate<'_>, |
694 | matches: &mut Vec<usize>, |
695 | ) { |
696 | if candidate.ext.is_empty() { |
697 | return; |
698 | } |
699 | if let Some(hits) = self.0.get(candidate.ext.as_bytes()) { |
700 | matches.extend(hits); |
701 | } |
702 | } |
703 | } |
704 | |
705 | #[derive (Clone, Debug)] |
706 | struct PrefixStrategy { |
707 | matcher: AhoCorasick, |
708 | map: Vec<usize>, |
709 | longest: usize, |
710 | } |
711 | |
712 | impl PrefixStrategy { |
713 | fn is_match(&self, candidate: &Candidate<'_>) -> bool { |
714 | let path: &[u8] = candidate.path_prefix(self.longest); |
715 | for m: Match in self.matcher.find_overlapping_iter(input:path) { |
716 | if m.start() == 0 { |
717 | return true; |
718 | } |
719 | } |
720 | false |
721 | } |
722 | |
723 | fn matches_into( |
724 | &self, |
725 | candidate: &Candidate<'_>, |
726 | matches: &mut Vec<usize>, |
727 | ) { |
728 | let path: &[u8] = candidate.path_prefix(self.longest); |
729 | for m: Match in self.matcher.find_overlapping_iter(input:path) { |
730 | if m.start() == 0 { |
731 | matches.push(self.map[m.pattern()]); |
732 | } |
733 | } |
734 | } |
735 | } |
736 | |
737 | #[derive (Clone, Debug)] |
738 | struct SuffixStrategy { |
739 | matcher: AhoCorasick, |
740 | map: Vec<usize>, |
741 | longest: usize, |
742 | } |
743 | |
744 | impl SuffixStrategy { |
745 | fn is_match(&self, candidate: &Candidate<'_>) -> bool { |
746 | let path: &[u8] = candidate.path_suffix(self.longest); |
747 | for m: Match in self.matcher.find_overlapping_iter(input:path) { |
748 | if m.end() == path.len() { |
749 | return true; |
750 | } |
751 | } |
752 | false |
753 | } |
754 | |
755 | fn matches_into( |
756 | &self, |
757 | candidate: &Candidate<'_>, |
758 | matches: &mut Vec<usize>, |
759 | ) { |
760 | let path: &[u8] = candidate.path_suffix(self.longest); |
761 | for m: Match in self.matcher.find_overlapping_iter(input:path) { |
762 | if m.end() == path.len() { |
763 | matches.push(self.map[m.pattern()]); |
764 | } |
765 | } |
766 | } |
767 | } |
768 | |
769 | #[derive (Clone, Debug)] |
770 | struct RequiredExtensionStrategy(fnv::HashMap<Vec<u8>, Vec<(usize, Regex)>>); |
771 | |
772 | impl RequiredExtensionStrategy { |
773 | fn is_match(&self, candidate: &Candidate<'_>) -> bool { |
774 | if candidate.ext.is_empty() { |
775 | return false; |
776 | } |
777 | match self.0.get(candidate.ext.as_bytes()) { |
778 | None => false, |
779 | Some(regexes) => { |
780 | for &(_, ref re) in regexes { |
781 | if re.is_match(candidate.path.as_bytes()) { |
782 | return true; |
783 | } |
784 | } |
785 | false |
786 | } |
787 | } |
788 | } |
789 | |
790 | #[inline (never)] |
791 | fn matches_into( |
792 | &self, |
793 | candidate: &Candidate<'_>, |
794 | matches: &mut Vec<usize>, |
795 | ) { |
796 | if candidate.ext.is_empty() { |
797 | return; |
798 | } |
799 | if let Some(regexes) = self.0.get(candidate.ext.as_bytes()) { |
800 | for &(global_index, ref re) in regexes { |
801 | if re.is_match(candidate.path.as_bytes()) { |
802 | matches.push(global_index); |
803 | } |
804 | } |
805 | } |
806 | } |
807 | } |
808 | |
809 | #[derive (Clone, Debug)] |
810 | struct RegexSetStrategy { |
811 | matcher: Regex, |
812 | map: Vec<usize>, |
813 | // We use a pool of PatternSets to hopefully allocating a fresh one on each |
814 | // call. |
815 | // |
816 | // TODO: In the next semver breaking release, we should drop this pool and |
817 | // expose an opaque type that wraps PatternSet. Then callers can provide |
818 | // it to `matches_into` directly. Callers might still want to use a pool |
819 | // or similar to amortize allocation, but that matches the status quo and |
820 | // absolves us of needing to do it here. |
821 | patset: Arc<Pool<PatternSet, PatternSetPoolFn>>, |
822 | } |
823 | |
824 | type PatternSetPoolFn = |
825 | Box<dyn Fn() -> PatternSet + Send + Sync + UnwindSafe + RefUnwindSafe>; |
826 | |
827 | impl RegexSetStrategy { |
828 | fn is_match(&self, candidate: &Candidate<'_>) -> bool { |
829 | self.matcher.is_match(input:candidate.path.as_bytes()) |
830 | } |
831 | |
832 | fn matches_into( |
833 | &self, |
834 | candidate: &Candidate<'_>, |
835 | matches: &mut Vec<usize>, |
836 | ) { |
837 | let input: Input<'_> = regex_automata::Input::new(haystack:candidate.path.as_bytes()); |
838 | let mut patset: PoolGuard<'_, PatternSet, …> = self.patset.get(); |
839 | patset.clear(); |
840 | self.matcher.which_overlapping_matches(&input, &mut patset); |
841 | for i: PatternID in patset.iter() { |
842 | matches.push(self.map[i]); |
843 | } |
844 | PoolGuard::put(this:patset); |
845 | } |
846 | } |
847 | |
848 | #[derive (Clone, Debug)] |
849 | struct MultiStrategyBuilder { |
850 | literals: Vec<String>, |
851 | map: Vec<usize>, |
852 | longest: usize, |
853 | } |
854 | |
855 | impl MultiStrategyBuilder { |
856 | fn new() -> MultiStrategyBuilder { |
857 | MultiStrategyBuilder { literals: vec![], map: vec![], longest: 0 } |
858 | } |
859 | |
860 | fn add(&mut self, global_index: usize, literal: String) { |
861 | if literal.len() > self.longest { |
862 | self.longest = literal.len(); |
863 | } |
864 | self.map.push(global_index); |
865 | self.literals.push(literal); |
866 | } |
867 | |
868 | fn prefix(self) -> PrefixStrategy { |
869 | PrefixStrategy { |
870 | matcher: AhoCorasick::new(&self.literals).unwrap(), |
871 | map: self.map, |
872 | longest: self.longest, |
873 | } |
874 | } |
875 | |
876 | fn suffix(self) -> SuffixStrategy { |
877 | SuffixStrategy { |
878 | matcher: AhoCorasick::new(&self.literals).unwrap(), |
879 | map: self.map, |
880 | longest: self.longest, |
881 | } |
882 | } |
883 | |
884 | fn regex_set(self) -> Result<RegexSetStrategy, Error> { |
885 | let matcher = new_regex_set(self.literals)?; |
886 | let pattern_len = matcher.pattern_len(); |
887 | let create: PatternSetPoolFn = |
888 | Box::new(move || PatternSet::new(pattern_len)); |
889 | Ok(RegexSetStrategy { |
890 | matcher, |
891 | map: self.map, |
892 | patset: Arc::new(Pool::new(create)), |
893 | }) |
894 | } |
895 | } |
896 | |
897 | #[derive (Clone, Debug)] |
898 | struct RequiredExtensionStrategyBuilder( |
899 | fnv::HashMap<Vec<u8>, Vec<(usize, String)>>, |
900 | ); |
901 | |
902 | impl RequiredExtensionStrategyBuilder { |
903 | fn new() -> RequiredExtensionStrategyBuilder { |
904 | RequiredExtensionStrategyBuilder(fnv::HashMap::default()) |
905 | } |
906 | |
907 | fn add(&mut self, global_index: usize, ext: String, regex: String) { |
908 | self.0 |
909 | .entry(ext.into_bytes()) |
910 | .or_insert(default:vec![]) |
911 | .push((global_index, regex)); |
912 | } |
913 | |
914 | fn build(self) -> Result<RequiredExtensionStrategy, Error> { |
915 | let mut exts: HashMap, Vec<(usize, …)>, …> = fnv::HashMap::default(); |
916 | for (ext: Vec, regexes: Vec<(usize, String)>) in self.0.into_iter() { |
917 | exts.insert(k:ext.clone(), v:vec![]); |
918 | for (global_index: usize, regex: String) in regexes { |
919 | let compiled: Regex = new_regex(®ex)?; |
920 | exts.get_mut(&ext).unwrap().push((global_index, compiled)); |
921 | } |
922 | } |
923 | Ok(RequiredExtensionStrategy(exts)) |
924 | } |
925 | } |
926 | |
927 | /// Escape meta-characters within the given glob pattern. |
928 | /// |
929 | /// The escaping works by surrounding meta-characters with brackets. For |
930 | /// example, `*` becomes `[*]`. |
931 | pub fn escape(s: &str) -> String { |
932 | let mut escaped: String = String::with_capacity(s.len()); |
933 | for c: char in s.chars() { |
934 | match c { |
935 | // note that ! does not need escaping because it is only special |
936 | // inside brackets |
937 | '?' | '*' | '[' | ']' => { |
938 | escaped.push(ch:'[' ); |
939 | escaped.push(ch:c); |
940 | escaped.push(ch:']' ); |
941 | } |
942 | c: char => { |
943 | escaped.push(ch:c); |
944 | } |
945 | } |
946 | } |
947 | escaped |
948 | } |
949 | |
950 | #[cfg (test)] |
951 | mod tests { |
952 | use crate::glob::Glob; |
953 | |
954 | use super::{GlobSet, GlobSetBuilder}; |
955 | |
956 | #[test ] |
957 | fn set_works() { |
958 | let mut builder = GlobSetBuilder::new(); |
959 | builder.add(Glob::new("src/**/*.rs" ).unwrap()); |
960 | builder.add(Glob::new("*.c" ).unwrap()); |
961 | builder.add(Glob::new("src/lib.rs" ).unwrap()); |
962 | let set = builder.build().unwrap(); |
963 | |
964 | assert!(set.is_match("foo.c" )); |
965 | assert!(set.is_match("src/foo.c" )); |
966 | assert!(!set.is_match("foo.rs" )); |
967 | assert!(!set.is_match("tests/foo.rs" )); |
968 | assert!(set.is_match("src/foo.rs" )); |
969 | assert!(set.is_match("src/grep/src/main.rs" )); |
970 | |
971 | let matches = set.matches("src/lib.rs" ); |
972 | assert_eq!(2, matches.len()); |
973 | assert_eq!(0, matches[0]); |
974 | assert_eq!(2, matches[1]); |
975 | } |
976 | |
977 | #[test ] |
978 | fn empty_set_works() { |
979 | let set = GlobSetBuilder::new().build().unwrap(); |
980 | assert!(!set.is_match("" )); |
981 | assert!(!set.is_match("a" )); |
982 | } |
983 | |
984 | #[test ] |
985 | fn default_set_is_empty_works() { |
986 | let set: GlobSet = Default::default(); |
987 | assert!(!set.is_match("" )); |
988 | assert!(!set.is_match("a" )); |
989 | } |
990 | |
991 | #[test ] |
992 | fn escape() { |
993 | use super::escape; |
994 | assert_eq!("foo" , escape("foo" )); |
995 | assert_eq!("foo[*]" , escape("foo*" )); |
996 | assert_eq!("[[][]]" , escape("[]" )); |
997 | assert_eq!("[*][?]" , escape("*?" )); |
998 | assert_eq!("src/[*][*]/[*].rs" , escape("src/**/*.rs" )); |
999 | assert_eq!("bar[[]ab[]]baz" , escape("bar[ab]baz" )); |
1000 | assert_eq!("bar[[]!![]]!baz" , escape("bar[!!]!baz" )); |
1001 | } |
1002 | |
1003 | // This tests that regex matching doesn't "remember" the results of |
1004 | // previous searches. That is, if any memory is reused from a previous |
1005 | // search, then it should be cleared first. |
1006 | #[test ] |
1007 | fn set_does_not_remember() { |
1008 | let mut builder = GlobSetBuilder::new(); |
1009 | builder.add(Glob::new("*foo*" ).unwrap()); |
1010 | builder.add(Glob::new("*bar*" ).unwrap()); |
1011 | builder.add(Glob::new("*quux*" ).unwrap()); |
1012 | let set = builder.build().unwrap(); |
1013 | |
1014 | let matches = set.matches("ZfooZquuxZ" ); |
1015 | assert_eq!(2, matches.len()); |
1016 | assert_eq!(0, matches[0]); |
1017 | assert_eq!(2, matches[1]); |
1018 | |
1019 | let matches = set.matches("nada" ); |
1020 | assert_eq!(0, matches.len()); |
1021 | } |
1022 | } |
1023 | |