1 | /*! |
2 | The globset crate provides cross platform single glob and glob set matching. |
3 | |
4 | Glob set matching is the process of matching one or more glob patterns against |
5 | a single candidate path simultaneously, and returning all of the globs that |
6 | matched. For example, given this set of globs: |
7 | |
8 | ```ignore |
9 | *.rs |
10 | src/lib.rs |
11 | src/**/foo.rs |
12 | ``` |
13 | |
14 | and a path `src/bar/baz/foo.rs`, then the set would report the first and third |
15 | globs as matching. |
16 | |
17 | # Example: one glob |
18 | |
19 | This example shows how to match a single glob against a single file path. |
20 | |
21 | ``` |
22 | # fn example() -> Result<(), globset::Error> { |
23 | use globset::Glob; |
24 | |
25 | let glob = Glob::new("*.rs" )?.compile_matcher(); |
26 | |
27 | assert!(glob.is_match("foo.rs" )); |
28 | assert!(glob.is_match("foo/bar.rs" )); |
29 | assert!(!glob.is_match("Cargo.toml" )); |
30 | # Ok(()) } example().unwrap(); |
31 | ``` |
32 | |
33 | # Example: configuring a glob matcher |
34 | |
35 | This example shows how to use a `GlobBuilder` to configure aspects of match |
36 | semantics. In this example, we prevent wildcards from matching path separators. |
37 | |
38 | ``` |
39 | # fn example() -> Result<(), globset::Error> { |
40 | use globset::GlobBuilder; |
41 | |
42 | let glob = GlobBuilder::new("*.rs" ) |
43 | .literal_separator(true).build()?.compile_matcher(); |
44 | |
45 | assert!(glob.is_match("foo.rs" )); |
46 | assert!(!glob.is_match("foo/bar.rs" )); // no longer matches |
47 | assert!(!glob.is_match("Cargo.toml" )); |
48 | # Ok(()) } example().unwrap(); |
49 | ``` |
50 | |
51 | # Example: match multiple globs at once |
52 | |
53 | This example shows how to match multiple glob patterns at once. |
54 | |
55 | ``` |
56 | # fn example() -> Result<(), globset::Error> { |
57 | use globset::{Glob, GlobSetBuilder}; |
58 | |
59 | let mut builder = GlobSetBuilder::new(); |
60 | // A GlobBuilder can be used to configure each glob's match semantics |
61 | // independently. |
62 | builder.add(Glob::new("*.rs" )?); |
63 | builder.add(Glob::new("src/lib.rs" )?); |
64 | builder.add(Glob::new("src/**/foo.rs" )?); |
65 | let set = builder.build()?; |
66 | |
67 | assert_eq!(set.matches("src/bar/baz/foo.rs" ), vec![0, 2]); |
68 | # Ok(()) } example().unwrap(); |
69 | ``` |
70 | |
71 | # Syntax |
72 | |
73 | Standard Unix-style glob syntax is supported: |
74 | |
75 | * `?` matches any single character. (If the `literal_separator` option is |
76 | enabled, then `?` can never match a path separator.) |
77 | * `*` matches zero or more characters. (If the `literal_separator` option is |
78 | enabled, then `*` can never match a path separator.) |
79 | * `**` recursively matches directories but are only legal in three situations. |
80 | First, if the glob starts with <code>\*\*/</code>, then it matches |
81 | all directories. For example, <code>\*\*/foo</code> matches `foo` |
82 | and `bar/foo` but not `foo/bar`. Secondly, if the glob ends with |
83 | <code>/\*\*</code>, then it matches all sub-entries. For example, |
84 | <code>foo/\*\*</code> matches `foo/a` and `foo/a/b`, but not `foo`. |
85 | Thirdly, if the glob contains <code>/\*\*/</code> anywhere within |
86 | the pattern, then it matches zero or more directories. Using `**` anywhere |
87 | else is illegal (N.B. the glob `**` is allowed and means "match everything"). |
88 | * `{a,b}` matches `a` or `b` where `a` and `b` are arbitrary glob patterns. |
89 | (N.B. Nesting `{...}` is not currently allowed.) |
90 | * `[ab]` matches `a` or `b` where `a` and `b` are characters. Use |
91 | `[!ab]` to match any character except for `a` and `b`. |
92 | * Metacharacters such as `*` and `?` can be escaped with character class |
93 | notation. e.g., `[*]` matches `*`. |
94 | * When backslash escapes are enabled, a backslash (`\`) will escape all meta |
95 | characters in a glob. If it precedes a non-meta character, then the slash is |
96 | ignored. A `\\` will match a literal `\\`. Note that this mode is only |
97 | enabled on Unix platforms by default, but can be enabled on any platform |
98 | via the `backslash_escape` setting on `Glob`. |
99 | |
100 | A `GlobBuilder` can be used to prevent wildcards from matching path separators, |
101 | or to enable case insensitive matching. |
102 | */ |
103 | |
104 | #![deny (missing_docs)] |
105 | |
106 | use std::borrow::Cow; |
107 | use std::collections::{BTreeMap, HashMap}; |
108 | use std::error::Error as StdError; |
109 | use std::fmt; |
110 | use std::hash; |
111 | use std::path::Path; |
112 | use std::str; |
113 | |
114 | use aho_corasick::AhoCorasick; |
115 | use bstr::{ByteSlice, ByteVec, B}; |
116 | use regex::bytes::{Regex, RegexBuilder, RegexSet}; |
117 | |
118 | use crate::glob::MatchStrategy; |
119 | pub use crate::glob::{Glob, GlobBuilder, GlobMatcher}; |
120 | use crate::pathutil::{file_name, file_name_ext, normalize_path}; |
121 | |
122 | mod glob; |
123 | mod pathutil; |
124 | |
125 | #[cfg (feature = "serde1" )] |
126 | mod serde_impl; |
127 | |
128 | #[cfg (feature = "log" )] |
129 | macro_rules! debug { |
130 | ($($token:tt)*) => (::log::debug!($($token)*);) |
131 | } |
132 | |
133 | #[cfg (not(feature = "log" ))] |
134 | macro_rules! debug { |
135 | ($($token:tt)*) => {}; |
136 | } |
137 | |
138 | /// Represents an error that can occur when parsing a glob pattern. |
139 | #[derive (Clone, Debug, Eq, PartialEq)] |
140 | pub struct Error { |
141 | /// The original glob provided by the caller. |
142 | glob: Option<String>, |
143 | /// The kind of error. |
144 | kind: ErrorKind, |
145 | } |
146 | |
147 | /// The kind of error that can occur when parsing a glob pattern. |
148 | #[derive (Clone, Debug, Eq, PartialEq)] |
149 | pub enum ErrorKind { |
150 | /// **DEPRECATED**. |
151 | /// |
152 | /// This error used to occur for consistency with git's glob specification, |
153 | /// but the specification now accepts all uses of `**`. When `**` does not |
154 | /// appear adjacent to a path separator or at the beginning/end of a glob, |
155 | /// it is now treated as two consecutive `*` patterns. As such, this error |
156 | /// is no longer used. |
157 | InvalidRecursive, |
158 | /// Occurs when a character class (e.g., `[abc]`) is not closed. |
159 | UnclosedClass, |
160 | /// Occurs when a range in a character (e.g., `[a-z]`) is invalid. For |
161 | /// example, if the range starts with a lexicographically larger character |
162 | /// than it ends with. |
163 | InvalidRange(char, char), |
164 | /// Occurs when a `}` is found without a matching `{`. |
165 | UnopenedAlternates, |
166 | /// Occurs when a `{` is found without a matching `}`. |
167 | UnclosedAlternates, |
168 | /// Occurs when an alternating group is nested inside another alternating |
169 | /// group, e.g., `{{a,b},{c,d}}`. |
170 | NestedAlternates, |
171 | /// Occurs when an unescaped '\' is found at the end of a glob. |
172 | DanglingEscape, |
173 | /// An error associated with parsing or compiling a regex. |
174 | Regex(String), |
175 | /// Hints that destructuring should not be exhaustive. |
176 | /// |
177 | /// This enum may grow additional variants, so this makes sure clients |
178 | /// don't count on exhaustive matching. (Otherwise, adding a new variant |
179 | /// could break existing code.) |
180 | #[doc (hidden)] |
181 | __Nonexhaustive, |
182 | } |
183 | |
184 | impl StdError for Error { |
185 | fn description(&self) -> &str { |
186 | self.kind.description() |
187 | } |
188 | } |
189 | |
190 | impl Error { |
191 | /// Return the glob that caused this error, if one exists. |
192 | pub fn glob(&self) -> Option<&str> { |
193 | self.glob.as_ref().map(|s: &String| &**s) |
194 | } |
195 | |
196 | /// Return the kind of this error. |
197 | pub fn kind(&self) -> &ErrorKind { |
198 | &self.kind |
199 | } |
200 | } |
201 | |
202 | impl ErrorKind { |
203 | fn description(&self) -> &str { |
204 | match *self { |
205 | ErrorKind::InvalidRecursive => { |
206 | "invalid use of **; must be one path component" |
207 | } |
208 | ErrorKind::UnclosedClass => { |
209 | "unclosed character class; missing ']'" |
210 | } |
211 | ErrorKind::InvalidRange(_, _) => "invalid character range" , |
212 | ErrorKind::UnopenedAlternates => { |
213 | "unopened alternate group; missing '{' \ |
214 | (maybe escape '}' with '[}]'?)" |
215 | } |
216 | ErrorKind::UnclosedAlternates => { |
217 | "unclosed alternate group; missing '}' \ |
218 | (maybe escape '{' with '[{]'?)" |
219 | } |
220 | ErrorKind::NestedAlternates => { |
221 | "nested alternate groups are not allowed" |
222 | } |
223 | ErrorKind::DanglingEscape => "dangling ' \\'" , |
224 | ErrorKind::Regex(ref err) => err, |
225 | ErrorKind::__Nonexhaustive => unreachable!(), |
226 | } |
227 | } |
228 | } |
229 | |
230 | impl fmt::Display for Error { |
231 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
232 | match self.glob { |
233 | None => self.kind.fmt(f), |
234 | Some(ref glob: &String) => { |
235 | write!(f, "error parsing glob ' {}': {}" , glob, self.kind) |
236 | } |
237 | } |
238 | } |
239 | } |
240 | |
241 | impl fmt::Display for ErrorKind { |
242 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
243 | match *self { |
244 | ErrorKind::InvalidRecursive |
245 | | ErrorKind::UnclosedClass |
246 | | ErrorKind::UnopenedAlternates |
247 | | ErrorKind::UnclosedAlternates |
248 | | ErrorKind::NestedAlternates |
249 | | ErrorKind::DanglingEscape |
250 | | ErrorKind::Regex(_) => write!(f, " {}" , self.description()), |
251 | ErrorKind::InvalidRange(s: char, e: char) => { |
252 | write!(f, "invalid range; ' {}' > ' {}'" , s, e) |
253 | } |
254 | ErrorKind::__Nonexhaustive => unreachable!(), |
255 | } |
256 | } |
257 | } |
258 | |
259 | fn new_regex(pat: &str) -> Result<Regex, Error> { |
260 | RegexBuilder::new(pat) |
261 | .dot_matches_new_line(true) |
262 | .size_limit(10 * (1 << 20)) |
263 | .dfa_size_limit(10 * (1 << 20)) |
264 | .build() |
265 | .map_err(|err: Error| Error { |
266 | glob: Some(pat.to_string()), |
267 | kind: ErrorKind::Regex(err.to_string()), |
268 | }) |
269 | } |
270 | |
271 | fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error> |
272 | where |
273 | S: AsRef<str>, |
274 | I: IntoIterator<Item = S>, |
275 | { |
276 | RegexSet::new(pats).map_err(|err: Error| Error { |
277 | glob: None, |
278 | kind: ErrorKind::Regex(err.to_string()), |
279 | }) |
280 | } |
281 | |
282 | type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>; |
283 | |
284 | /// GlobSet represents a group of globs that can be matched together in a |
285 | /// single pass. |
286 | #[derive (Clone, Debug)] |
287 | pub struct GlobSet { |
288 | len: usize, |
289 | strats: Vec<GlobSetMatchStrategy>, |
290 | } |
291 | |
292 | impl GlobSet { |
293 | /// Create an empty `GlobSet`. An empty set matches nothing. |
294 | #[inline ] |
295 | pub fn empty() -> GlobSet { |
296 | GlobSet { len: 0, strats: vec![] } |
297 | } |
298 | |
299 | /// Returns true if this set is empty, and therefore matches nothing. |
300 | #[inline ] |
301 | pub fn is_empty(&self) -> bool { |
302 | self.len == 0 |
303 | } |
304 | |
305 | /// Returns the number of globs in this set. |
306 | #[inline ] |
307 | pub fn len(&self) -> usize { |
308 | self.len |
309 | } |
310 | |
311 | /// Returns true if any glob in this set matches the path given. |
312 | pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool { |
313 | self.is_match_candidate(&Candidate::new(path.as_ref())) |
314 | } |
315 | |
316 | /// Returns true if any glob in this set matches the path given. |
317 | /// |
318 | /// This takes a Candidate as input, which can be used to amortize the |
319 | /// cost of preparing a path for matching. |
320 | pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool { |
321 | if self.is_empty() { |
322 | return false; |
323 | } |
324 | for strat in &self.strats { |
325 | if strat.is_match(path) { |
326 | return true; |
327 | } |
328 | } |
329 | false |
330 | } |
331 | |
332 | /// Returns the sequence number of every glob pattern that matches the |
333 | /// given path. |
334 | pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> { |
335 | self.matches_candidate(&Candidate::new(path.as_ref())) |
336 | } |
337 | |
338 | /// Returns the sequence number of every glob pattern that matches the |
339 | /// given path. |
340 | /// |
341 | /// This takes a Candidate as input, which can be used to amortize the |
342 | /// cost of preparing a path for matching. |
343 | pub fn matches_candidate(&self, path: &Candidate<'_>) -> Vec<usize> { |
344 | let mut into = vec![]; |
345 | if self.is_empty() { |
346 | return into; |
347 | } |
348 | self.matches_candidate_into(path, &mut into); |
349 | into |
350 | } |
351 | |
352 | /// Adds the sequence number of every glob pattern that matches the given |
353 | /// path to the vec given. |
354 | /// |
355 | /// `into` is cleared before matching begins, and contains the set of |
356 | /// sequence numbers (in ascending order) after matching ends. If no globs |
357 | /// were matched, then `into` will be empty. |
358 | pub fn matches_into<P: AsRef<Path>>( |
359 | &self, |
360 | path: P, |
361 | into: &mut Vec<usize>, |
362 | ) { |
363 | self.matches_candidate_into(&Candidate::new(path.as_ref()), into); |
364 | } |
365 | |
366 | /// Adds the sequence number of every glob pattern that matches the given |
367 | /// path to the vec given. |
368 | /// |
369 | /// `into` is cleared before matching begins, and contains the set of |
370 | /// sequence numbers (in ascending order) after matching ends. If no globs |
371 | /// were matched, then `into` will be empty. |
372 | /// |
373 | /// This takes a Candidate as input, which can be used to amortize the |
374 | /// cost of preparing a path for matching. |
375 | pub fn matches_candidate_into( |
376 | &self, |
377 | path: &Candidate<'_>, |
378 | into: &mut Vec<usize>, |
379 | ) { |
380 | into.clear(); |
381 | if self.is_empty() { |
382 | return; |
383 | } |
384 | for strat in &self.strats { |
385 | strat.matches_into(path, into); |
386 | } |
387 | into.sort(); |
388 | into.dedup(); |
389 | } |
390 | |
391 | fn new(pats: &[Glob]) -> Result<GlobSet, Error> { |
392 | if pats.is_empty() { |
393 | return Ok(GlobSet { len: 0, strats: vec![] }); |
394 | } |
395 | let mut lits = LiteralStrategy::new(); |
396 | let mut base_lits = BasenameLiteralStrategy::new(); |
397 | let mut exts = ExtensionStrategy::new(); |
398 | let mut prefixes = MultiStrategyBuilder::new(); |
399 | let mut suffixes = MultiStrategyBuilder::new(); |
400 | let mut required_exts = RequiredExtensionStrategyBuilder::new(); |
401 | let mut regexes = MultiStrategyBuilder::new(); |
402 | for (i, p) in pats.iter().enumerate() { |
403 | match MatchStrategy::new(p) { |
404 | MatchStrategy::Literal(lit) => { |
405 | lits.add(i, lit); |
406 | } |
407 | MatchStrategy::BasenameLiteral(lit) => { |
408 | base_lits.add(i, lit); |
409 | } |
410 | MatchStrategy::Extension(ext) => { |
411 | exts.add(i, ext); |
412 | } |
413 | MatchStrategy::Prefix(prefix) => { |
414 | prefixes.add(i, prefix); |
415 | } |
416 | MatchStrategy::Suffix { suffix, component } => { |
417 | if component { |
418 | lits.add(i, suffix[1..].to_string()); |
419 | } |
420 | suffixes.add(i, suffix); |
421 | } |
422 | MatchStrategy::RequiredExtension(ext) => { |
423 | required_exts.add(i, ext, p.regex().to_owned()); |
424 | } |
425 | MatchStrategy::Regex => { |
426 | debug!("glob converted to regex: {:?}" , p); |
427 | regexes.add(i, p.regex().to_owned()); |
428 | } |
429 | } |
430 | } |
431 | debug!( |
432 | "built glob set; {} literals, {} basenames, {} extensions, \ |
433 | {} prefixes, {} suffixes, {} required extensions, {} regexes" , |
434 | lits.0.len(), |
435 | base_lits.0.len(), |
436 | exts.0.len(), |
437 | prefixes.literals.len(), |
438 | suffixes.literals.len(), |
439 | required_exts.0.len(), |
440 | regexes.literals.len() |
441 | ); |
442 | Ok(GlobSet { |
443 | len: pats.len(), |
444 | strats: vec![ |
445 | GlobSetMatchStrategy::Extension(exts), |
446 | GlobSetMatchStrategy::BasenameLiteral(base_lits), |
447 | GlobSetMatchStrategy::Literal(lits), |
448 | GlobSetMatchStrategy::Suffix(suffixes.suffix()), |
449 | GlobSetMatchStrategy::Prefix(prefixes.prefix()), |
450 | GlobSetMatchStrategy::RequiredExtension( |
451 | required_exts.build()?, |
452 | ), |
453 | GlobSetMatchStrategy::Regex(regexes.regex_set()?), |
454 | ], |
455 | }) |
456 | } |
457 | } |
458 | |
459 | impl Default for GlobSet { |
460 | /// Create a default empty GlobSet. |
461 | fn default() -> Self { |
462 | GlobSet::empty() |
463 | } |
464 | } |
465 | |
466 | /// GlobSetBuilder builds a group of patterns that can be used to |
467 | /// simultaneously match a file path. |
468 | #[derive (Clone, Debug)] |
469 | pub struct GlobSetBuilder { |
470 | pats: Vec<Glob>, |
471 | } |
472 | |
473 | impl GlobSetBuilder { |
474 | /// Create a new GlobSetBuilder. A GlobSetBuilder can be used to add new |
475 | /// patterns. Once all patterns have been added, `build` should be called |
476 | /// to produce a `GlobSet`, which can then be used for matching. |
477 | pub fn new() -> GlobSetBuilder { |
478 | GlobSetBuilder { pats: vec![] } |
479 | } |
480 | |
481 | /// Builds a new matcher from all of the glob patterns added so far. |
482 | /// |
483 | /// Once a matcher is built, no new patterns can be added to it. |
484 | pub fn build(&self) -> Result<GlobSet, Error> { |
485 | GlobSet::new(&self.pats) |
486 | } |
487 | |
488 | /// Add a new pattern to this set. |
489 | pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder { |
490 | self.pats.push(pat); |
491 | self |
492 | } |
493 | } |
494 | |
495 | /// A candidate path for matching. |
496 | /// |
497 | /// All glob matching in this crate operates on `Candidate` values. |
498 | /// Constructing candidates has a very small cost associated with it, so |
499 | /// callers may find it beneficial to amortize that cost when matching a single |
500 | /// path against multiple globs or sets of globs. |
501 | #[derive (Clone, Debug)] |
502 | pub struct Candidate<'a> { |
503 | path: Cow<'a, [u8]>, |
504 | basename: Cow<'a, [u8]>, |
505 | ext: Cow<'a, [u8]>, |
506 | } |
507 | |
508 | impl<'a> Candidate<'a> { |
509 | /// Create a new candidate for matching from the given path. |
510 | pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> { |
511 | let path = normalize_path(Vec::from_path_lossy(path.as_ref())); |
512 | let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("" ))); |
513 | let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("" ))); |
514 | Candidate { path: path, basename: basename, ext: ext } |
515 | } |
516 | |
517 | fn path_prefix(&self, max: usize) -> &[u8] { |
518 | if self.path.len() <= max { |
519 | &*self.path |
520 | } else { |
521 | &self.path[..max] |
522 | } |
523 | } |
524 | |
525 | fn path_suffix(&self, max: usize) -> &[u8] { |
526 | if self.path.len() <= max { |
527 | &*self.path |
528 | } else { |
529 | &self.path[self.path.len() - max..] |
530 | } |
531 | } |
532 | } |
533 | |
534 | #[derive (Clone, Debug)] |
535 | enum GlobSetMatchStrategy { |
536 | Literal(LiteralStrategy), |
537 | BasenameLiteral(BasenameLiteralStrategy), |
538 | Extension(ExtensionStrategy), |
539 | Prefix(PrefixStrategy), |
540 | Suffix(SuffixStrategy), |
541 | RequiredExtension(RequiredExtensionStrategy), |
542 | Regex(RegexSetStrategy), |
543 | } |
544 | |
545 | impl GlobSetMatchStrategy { |
546 | fn is_match(&self, candidate: &Candidate<'_>) -> bool { |
547 | use self::GlobSetMatchStrategy::*; |
548 | match *self { |
549 | Literal(ref s) => s.is_match(candidate), |
550 | BasenameLiteral(ref s) => s.is_match(candidate), |
551 | Extension(ref s) => s.is_match(candidate), |
552 | Prefix(ref s) => s.is_match(candidate), |
553 | Suffix(ref s) => s.is_match(candidate), |
554 | RequiredExtension(ref s) => s.is_match(candidate), |
555 | Regex(ref s) => s.is_match(candidate), |
556 | } |
557 | } |
558 | |
559 | fn matches_into( |
560 | &self, |
561 | candidate: &Candidate<'_>, |
562 | matches: &mut Vec<usize>, |
563 | ) { |
564 | use self::GlobSetMatchStrategy::*; |
565 | match *self { |
566 | Literal(ref s) => s.matches_into(candidate, matches), |
567 | BasenameLiteral(ref s) => s.matches_into(candidate, matches), |
568 | Extension(ref s) => s.matches_into(candidate, matches), |
569 | Prefix(ref s) => s.matches_into(candidate, matches), |
570 | Suffix(ref s) => s.matches_into(candidate, matches), |
571 | RequiredExtension(ref s) => s.matches_into(candidate, matches), |
572 | Regex(ref s) => s.matches_into(candidate, matches), |
573 | } |
574 | } |
575 | } |
576 | |
577 | #[derive (Clone, Debug)] |
578 | struct LiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>); |
579 | |
580 | impl LiteralStrategy { |
581 | fn new() -> LiteralStrategy { |
582 | LiteralStrategy(BTreeMap::new()) |
583 | } |
584 | |
585 | fn add(&mut self, global_index: usize, lit: String) { |
586 | self.0.entry(lit.into_bytes()).or_insert(default:vec![]).push(global_index); |
587 | } |
588 | |
589 | fn is_match(&self, candidate: &Candidate<'_>) -> bool { |
590 | self.0.contains_key(candidate.path.as_bytes()) |
591 | } |
592 | |
593 | #[inline (never)] |
594 | fn matches_into( |
595 | &self, |
596 | candidate: &Candidate<'_>, |
597 | matches: &mut Vec<usize>, |
598 | ) { |
599 | if let Some(hits: &Vec) = self.0.get(key:candidate.path.as_bytes()) { |
600 | matches.extend(iter:hits); |
601 | } |
602 | } |
603 | } |
604 | |
605 | #[derive (Clone, Debug)] |
606 | struct BasenameLiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>); |
607 | |
608 | impl BasenameLiteralStrategy { |
609 | fn new() -> BasenameLiteralStrategy { |
610 | BasenameLiteralStrategy(BTreeMap::new()) |
611 | } |
612 | |
613 | fn add(&mut self, global_index: usize, lit: String) { |
614 | self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index); |
615 | } |
616 | |
617 | fn is_match(&self, candidate: &Candidate<'_>) -> bool { |
618 | if candidate.basename.is_empty() { |
619 | return false; |
620 | } |
621 | self.0.contains_key(candidate.basename.as_bytes()) |
622 | } |
623 | |
624 | #[inline (never)] |
625 | fn matches_into( |
626 | &self, |
627 | candidate: &Candidate<'_>, |
628 | matches: &mut Vec<usize>, |
629 | ) { |
630 | if candidate.basename.is_empty() { |
631 | return; |
632 | } |
633 | if let Some(hits) = self.0.get(candidate.basename.as_bytes()) { |
634 | matches.extend(hits); |
635 | } |
636 | } |
637 | } |
638 | |
639 | #[derive (Clone, Debug)] |
640 | struct ExtensionStrategy(HashMap<Vec<u8>, Vec<usize>, Fnv>); |
641 | |
642 | impl ExtensionStrategy { |
643 | fn new() -> ExtensionStrategy { |
644 | ExtensionStrategy(HashMap::with_hasher(Fnv::default())) |
645 | } |
646 | |
647 | fn add(&mut self, global_index: usize, ext: String) { |
648 | self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index); |
649 | } |
650 | |
651 | fn is_match(&self, candidate: &Candidate<'_>) -> bool { |
652 | if candidate.ext.is_empty() { |
653 | return false; |
654 | } |
655 | self.0.contains_key(candidate.ext.as_bytes()) |
656 | } |
657 | |
658 | #[inline (never)] |
659 | fn matches_into( |
660 | &self, |
661 | candidate: &Candidate<'_>, |
662 | matches: &mut Vec<usize>, |
663 | ) { |
664 | if candidate.ext.is_empty() { |
665 | return; |
666 | } |
667 | if let Some(hits) = self.0.get(candidate.ext.as_bytes()) { |
668 | matches.extend(hits); |
669 | } |
670 | } |
671 | } |
672 | |
673 | #[derive (Clone, Debug)] |
674 | struct PrefixStrategy { |
675 | matcher: AhoCorasick, |
676 | map: Vec<usize>, |
677 | longest: usize, |
678 | } |
679 | |
680 | impl PrefixStrategy { |
681 | fn is_match(&self, candidate: &Candidate<'_>) -> bool { |
682 | let path: &[u8] = candidate.path_prefix(self.longest); |
683 | for m: Match in self.matcher.find_overlapping_iter(haystack:path) { |
684 | if m.start() == 0 { |
685 | return true; |
686 | } |
687 | } |
688 | false |
689 | } |
690 | |
691 | fn matches_into( |
692 | &self, |
693 | candidate: &Candidate<'_>, |
694 | matches: &mut Vec<usize>, |
695 | ) { |
696 | let path: &[u8] = candidate.path_prefix(self.longest); |
697 | for m: Match in self.matcher.find_overlapping_iter(haystack:path) { |
698 | if m.start() == 0 { |
699 | matches.push(self.map[m.pattern()]); |
700 | } |
701 | } |
702 | } |
703 | } |
704 | |
705 | #[derive (Clone, Debug)] |
706 | struct SuffixStrategy { |
707 | matcher: AhoCorasick, |
708 | map: Vec<usize>, |
709 | longest: usize, |
710 | } |
711 | |
712 | impl SuffixStrategy { |
713 | fn is_match(&self, candidate: &Candidate<'_>) -> bool { |
714 | let path: &[u8] = candidate.path_suffix(self.longest); |
715 | for m: Match in self.matcher.find_overlapping_iter(haystack:path) { |
716 | if m.end() == path.len() { |
717 | return true; |
718 | } |
719 | } |
720 | false |
721 | } |
722 | |
723 | fn matches_into( |
724 | &self, |
725 | candidate: &Candidate<'_>, |
726 | matches: &mut Vec<usize>, |
727 | ) { |
728 | let path: &[u8] = candidate.path_suffix(self.longest); |
729 | for m: Match in self.matcher.find_overlapping_iter(haystack:path) { |
730 | if m.end() == path.len() { |
731 | matches.push(self.map[m.pattern()]); |
732 | } |
733 | } |
734 | } |
735 | } |
736 | |
737 | #[derive (Clone, Debug)] |
738 | struct RequiredExtensionStrategy(HashMap<Vec<u8>, Vec<(usize, Regex)>, Fnv>); |
739 | |
740 | impl RequiredExtensionStrategy { |
741 | fn is_match(&self, candidate: &Candidate<'_>) -> bool { |
742 | if candidate.ext.is_empty() { |
743 | return false; |
744 | } |
745 | match self.0.get(candidate.ext.as_bytes()) { |
746 | None => false, |
747 | Some(regexes) => { |
748 | for &(_, ref re) in regexes { |
749 | if re.is_match(candidate.path.as_bytes()) { |
750 | return true; |
751 | } |
752 | } |
753 | false |
754 | } |
755 | } |
756 | } |
757 | |
758 | #[inline (never)] |
759 | fn matches_into( |
760 | &self, |
761 | candidate: &Candidate<'_>, |
762 | matches: &mut Vec<usize>, |
763 | ) { |
764 | if candidate.ext.is_empty() { |
765 | return; |
766 | } |
767 | if let Some(regexes) = self.0.get(candidate.ext.as_bytes()) { |
768 | for &(global_index, ref re) in regexes { |
769 | if re.is_match(candidate.path.as_bytes()) { |
770 | matches.push(global_index); |
771 | } |
772 | } |
773 | } |
774 | } |
775 | } |
776 | |
777 | #[derive (Clone, Debug)] |
778 | struct RegexSetStrategy { |
779 | matcher: RegexSet, |
780 | map: Vec<usize>, |
781 | } |
782 | |
783 | impl RegexSetStrategy { |
784 | fn is_match(&self, candidate: &Candidate<'_>) -> bool { |
785 | self.matcher.is_match(text:candidate.path.as_bytes()) |
786 | } |
787 | |
788 | fn matches_into( |
789 | &self, |
790 | candidate: &Candidate<'_>, |
791 | matches: &mut Vec<usize>, |
792 | ) { |
793 | for i: usize in self.matcher.matches(text:candidate.path.as_bytes()) { |
794 | matches.push(self.map[i]); |
795 | } |
796 | } |
797 | } |
798 | |
799 | #[derive (Clone, Debug)] |
800 | struct MultiStrategyBuilder { |
801 | literals: Vec<String>, |
802 | map: Vec<usize>, |
803 | longest: usize, |
804 | } |
805 | |
806 | impl MultiStrategyBuilder { |
807 | fn new() -> MultiStrategyBuilder { |
808 | MultiStrategyBuilder { literals: vec![], map: vec![], longest: 0 } |
809 | } |
810 | |
811 | fn add(&mut self, global_index: usize, literal: String) { |
812 | if literal.len() > self.longest { |
813 | self.longest = literal.len(); |
814 | } |
815 | self.map.push(global_index); |
816 | self.literals.push(literal); |
817 | } |
818 | |
819 | fn prefix(self) -> PrefixStrategy { |
820 | PrefixStrategy { |
821 | matcher: AhoCorasick::new_auto_configured(&self.literals), |
822 | map: self.map, |
823 | longest: self.longest, |
824 | } |
825 | } |
826 | |
827 | fn suffix(self) -> SuffixStrategy { |
828 | SuffixStrategy { |
829 | matcher: AhoCorasick::new_auto_configured(&self.literals), |
830 | map: self.map, |
831 | longest: self.longest, |
832 | } |
833 | } |
834 | |
835 | fn regex_set(self) -> Result<RegexSetStrategy, Error> { |
836 | Ok(RegexSetStrategy { |
837 | matcher: new_regex_set(self.literals)?, |
838 | map: self.map, |
839 | }) |
840 | } |
841 | } |
842 | |
843 | #[derive (Clone, Debug)] |
844 | struct RequiredExtensionStrategyBuilder( |
845 | HashMap<Vec<u8>, Vec<(usize, String)>>, |
846 | ); |
847 | |
848 | impl RequiredExtensionStrategyBuilder { |
849 | fn new() -> RequiredExtensionStrategyBuilder { |
850 | RequiredExtensionStrategyBuilder(HashMap::new()) |
851 | } |
852 | |
853 | fn add(&mut self, global_index: usize, ext: String, regex: String) { |
854 | self.0 |
855 | .entry(ext.into_bytes()) |
856 | .or_insert(default:vec![]) |
857 | .push((global_index, regex)); |
858 | } |
859 | |
860 | fn build(self) -> Result<RequiredExtensionStrategy, Error> { |
861 | let mut exts: HashMap, Vec<(usize, …)>, …> = HashMap::with_hasher(hash_builder:Fnv::default()); |
862 | for (ext: Vec, regexes: Vec<(usize, String)>) in self.0.into_iter() { |
863 | exts.insert(k:ext.clone(), v:vec![]); |
864 | for (global_index: usize, regex: String) in regexes { |
865 | let compiled: Regex = new_regex(®ex)?; |
866 | exts.get_mut(&ext).unwrap().push((global_index, compiled)); |
867 | } |
868 | } |
869 | Ok(RequiredExtensionStrategy(exts)) |
870 | } |
871 | } |
872 | |
873 | #[cfg (test)] |
874 | mod tests { |
875 | use super::{GlobSet, GlobSetBuilder}; |
876 | use crate::glob::Glob; |
877 | |
878 | #[test ] |
879 | fn set_works() { |
880 | let mut builder = GlobSetBuilder::new(); |
881 | builder.add(Glob::new("src/**/*.rs" ).unwrap()); |
882 | builder.add(Glob::new("*.c" ).unwrap()); |
883 | builder.add(Glob::new("src/lib.rs" ).unwrap()); |
884 | let set = builder.build().unwrap(); |
885 | |
886 | assert!(set.is_match("foo.c" )); |
887 | assert!(set.is_match("src/foo.c" )); |
888 | assert!(!set.is_match("foo.rs" )); |
889 | assert!(!set.is_match("tests/foo.rs" )); |
890 | assert!(set.is_match("src/foo.rs" )); |
891 | assert!(set.is_match("src/grep/src/main.rs" )); |
892 | |
893 | let matches = set.matches("src/lib.rs" ); |
894 | assert_eq!(2, matches.len()); |
895 | assert_eq!(0, matches[0]); |
896 | assert_eq!(2, matches[1]); |
897 | } |
898 | |
899 | #[test ] |
900 | fn empty_set_works() { |
901 | let set = GlobSetBuilder::new().build().unwrap(); |
902 | assert!(!set.is_match("" )); |
903 | assert!(!set.is_match("a" )); |
904 | } |
905 | |
906 | #[test ] |
907 | fn default_set_is_empty_works() { |
908 | let set: GlobSet = Default::default(); |
909 | assert!(!set.is_match("" )); |
910 | assert!(!set.is_match("a" )); |
911 | } |
912 | } |
913 | |