lib.rs source code [crates/globset-0.4.10/src/lib.rs]

1	/!*
2	The globset crate provides cross platform single glob and glob set matching.
3
4	Glob set matching is the process of matching one or more glob patterns against
5	a single candidate path simultaneously, and returning all of the globs that
6	matched. For example, given this set of globs:
7
8	```ignore
9	*.rs
10	src/lib.rs
11	src/**/foo.rs
12	```
13
14	and a path `src/bar/baz/foo.rs`, then the set would report the first and third
15	globs as matching.
16
17	# Example: one glob
18
19	This example shows how to match a single glob against a single file path.
20
21	```
22	# fn example() -> Result<(), globset::Error> {
23	use globset::Glob;
24
25	let glob = Glob::new("*.rs")?.compile_matcher();
26
27	assert!(glob.is_match("foo.rs"));
28	assert!(glob.is_match("foo/bar.rs"));
29	assert!(!glob.is_match("Cargo.toml"));
30	# Ok(()) } example().unwrap();
31	```
32
33	# Example: configuring a glob matcher
34
35	This example shows how to use a `GlobBuilder` to configure aspects of match
36	semantics. In this example, we prevent wildcards from matching path separators.
37
38	```
39	# fn example() -> Result<(), globset::Error> {
40	use globset::GlobBuilder;
41
42	let glob = GlobBuilder::new("*.rs")
43	.literal_separator(`true`).build()?.compile_matcher();
44
45	assert!(glob.is_match("foo.rs"));
46	assert!(!glob.is_match("foo/bar.rs")); // no longer matches
47	assert!(!glob.is_match("Cargo.toml"));
48	# Ok(()) } example().unwrap();
49	```
50
51	# Example: match multiple globs at once
52
53	This example shows how to match multiple glob patterns at once.
54
55	```
56	# fn example() -> Result<(), globset::Error> {
57	use globset::{Glob, GlobSetBuilder};
58
59	let mut builder = GlobSetBuilder::new();
60	// A GlobBuilder can be used to configure each glob's match semantics
61	// independently.
62	builder.add(Glob::new("*.rs")?);
63	builder.add(Glob::new("src/lib.rs")?);
64	builder.add(Glob::new("src/**/foo.rs")?);
65	let set = builder.build()?;
66
67	assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![`0`, `2`]);
68	# Ok(()) } example().unwrap();
69	```
70
71	# Syntax
72
73	Standard Unix-style glob syntax is supported:
74
75	* `?` matches any single character. (If the `literal_separator` option is
76	enabled, then `?` can never match a path separator.)
77	* `*` matches zero or more characters. (If the `literal_separator` option is
78	enabled, then `` can never match a path separator.)*
79	* `**` recursively matches directories but are only legal in three situations.
80	First, if the glob starts with <code>\\/</code>, then it matches
81	all directories. For example, <code>\\/foo</code> matches `foo`
82	and `bar/foo` but not `foo/bar`. Secondly, if the glob ends with
83	<code>/\\</code>, then it matches all sub-entries. For example,
84	<code>foo/\\</code> matches `foo/a` and `foo/a/b`, but not `foo`.
85	Thirdly, if the glob contains <code>/\\/</code> anywhere within
86	the pattern, then it matches zero or more directories. Using `` anywhere
87	else is illegal (N.B. the glob `` is allowed and means "match everything").
88	* `{a,b}` matches `a` or `b` where `a` and `b` are arbitrary glob patterns.
89	(N.B. Nesting `{...}` is not currently allowed.)
90	* `[ab]` matches `a` or `b` where `a` and `b` are characters. Use
91	`[!ab]` to match any character except for `a` and `b`.
92	* Metacharacters such as `*` and `?` can be escaped with character class
93	notation. e.g., `[]` matches ``.
94	* When backslash escapes are enabled, a backslash (`\`) will escape all meta
95	characters in a glob. If it precedes a non-meta character, then the slash is
96	ignored. A `\\` will match a literal `\\`. Note that this mode is only
97	enabled on Unix platforms by default, but can be enabled on any platform
98	via the `backslash_escape` setting on `Glob`.
99
100	A `GlobBuilder` can be used to prevent wildcards from matching path separators,
101	or to enable case insensitive matching.
102	*/
103
104	#![deny(missing_docs)]
105
106	use std::borrow::Cow;
107	use std::collections::{BTreeMap, HashMap};
108	use std::error::Error as StdError;
109	use std::fmt;
110	use std::hash;
111	use std::path::Path;
112	use std::str;
113
114	use aho_corasick::AhoCorasick;
115	use bstr::{ByteSlice, ByteVec, B};
116	use regex::bytes::{Regex, RegexBuilder, RegexSet};
117
118	use crate::glob::MatchStrategy;
119	pub use crate::glob::{Glob, GlobBuilder, GlobMatcher};
120	use crate::pathutil::{file_name, file_name_ext, normalize_path};
121
122	mod glob;
123	mod pathutil;
124
125	#[cfg(feature = "serde1")]
126	mod serde_impl;
127
128	#[cfg(feature = "log")]
129	macro_rules! debug {
130	($($token:tt)) => (::log::debug!($($token));)
131	}
132
133	#[cfg(not(feature = "log"))]
134	macro_rules! debug {
135	($($token:tt)*) => {};
136	}
137
138	/// Represents an error that can occur when parsing a glob pattern.
139	#[derive(Clone, Debug, Eq, PartialEq)]
140	pub struct Error {
141	/// The original glob provided by the caller.
142	glob: Option<String>,
143	/// The kind of error.
144	kind: ErrorKind,
145	}
146
147	/// The kind of error that can occur when parsing a glob pattern.
148	#[derive(Clone, Debug, Eq, PartialEq)]
149	pub enum ErrorKind {
150	/// DEPRECATED.
151	///
152	/// This error used to occur for consistency with git's glob specification,
153	/// but the specification now accepts all uses of ``. When `` does not
154	/// appear adjacent to a path separator or at the beginning/end of a glob,
155	/// it is now treated as two consecutive `` patterns. As such, this error*
156	/// is no longer used.
157	InvalidRecursive,
158	/// Occurs when a character class (e.g., `[abc]`) is not closed.
159	UnclosedClass,
160	/// Occurs when a range in a character (e.g., `[a-z]`) is invalid. For
161	/// example, if the range starts with a lexicographically larger character
162	/// than it ends with.
163	InvalidRange(char, char),
164	/// Occurs when a `}` is found without a matching `{`.
165	UnopenedAlternates,
166	/// Occurs when a `{` is found without a matching `}`.
167	UnclosedAlternates,
168	/// Occurs when an alternating group is nested inside another alternating
169	/// group, e.g., `{{a,b},{c,d}}`.
170	NestedAlternates,
171	/// Occurs when an unescaped '\' is found at the end of a glob.
172	DanglingEscape,
173	/// An error associated with parsing or compiling a regex.
174	Regex(String),
175	/// Hints that destructuring should not be exhaustive.
176	///
177	/// This enum may grow additional variants, so this makes sure clients
178	/// don't count on exhaustive matching. (Otherwise, adding a new variant
179	/// could break existing code.)
180	#[doc(hidden)]
181	__Nonexhaustive,
182	}
183
184	impl StdError for Error {
185	fn description(&self) -> &str {
186	self.kind.description()
187	}
188	}
189
190	impl Error {
191	/// Return the glob that caused this error, if one exists.
192	pub fn glob(&self) -> Option<&str> {
193	self.glob.as_ref().map(\|s: &String\| &**s)
194	}
195
196	/// Return the kind of this error.
197	pub fn kind(&self) -> &ErrorKind {
198	&self.kind
199	}
200	}
201
202	impl ErrorKind {
203	fn description(&self) -> &str {
204	match *self {
205	ErrorKind::InvalidRecursive => {
206	"invalid use of **; must be one path component"
207	}
208	ErrorKind::UnclosedClass => {
209	"unclosed character class; missing ']'"
210	}
211	ErrorKind::InvalidRange(_, _) => "invalid character range",
212	ErrorKind::UnopenedAlternates => {
213	"unopened alternate group; missing '{' \
214	(maybe escape '}' with '[}]'?)"
215	}
216	ErrorKind::UnclosedAlternates => {
217	"unclosed alternate group; missing '}' \
218	(maybe escape '{' with '[{]'?)"
219	}
220	ErrorKind::NestedAlternates => {
221	"nested alternate groups are not allowed"
222	}
223	ErrorKind::DanglingEscape => "dangling '`\\`'",
224	ErrorKind::Regex(ref err) => err,
225	ErrorKind::__Nonexhaustive => unreachable!(),
226	}
227	}
228	}
229
230	impl fmt::Display for Error {
231	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
232	match self.glob {
233	None => self.kind.fmt(f),
234	Some(ref glob: &String) => {
235	write!(f, "error parsing glob '{}': {}", glob, self.kind)
236	}
237	}
238	}
239	}
240
241	impl fmt::Display for ErrorKind {
242	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
243	match *self {
244	ErrorKind::InvalidRecursive
245	\| ErrorKind::UnclosedClass
246	\| ErrorKind::UnopenedAlternates
247	\| ErrorKind::UnclosedAlternates
248	\| ErrorKind::NestedAlternates
249	\| ErrorKind::DanglingEscape
250	\| ErrorKind::Regex(_) => write!(f, "{}", self.description()),
251	ErrorKind::InvalidRange(s: char, e: char) => {
252	write!(f, "invalid range; '{}' > '{}'", s, e)
253	}
254	ErrorKind::__Nonexhaustive => unreachable!(),
255	}
256	}
257	}
258
259	fn new_regex(pat: &str) -> Result<Regex, Error> {
260	RegexBuilder::new(pat)
261	.dot_matches_new_line(`true`)
262	.size_limit(`10` * (`1` << `20`))
263	.dfa_size_limit(`10` * (`1` << `20`))
264	.build()
265	.map_err(\|err: Error\| Error {
266	glob: Some(pat.to_string()),
267	kind: ErrorKind::Regex(err.to_string()),
268	})
269	}
270
271	fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error>
272	where
273	S: AsRef<str>,
274	I: IntoIterator<Item = S>,
275	{
276	RegexSet::new(pats).map_err(\|err: Error\| Error {
277	glob: None,
278	kind: ErrorKind::Regex(err.to_string()),
279	})
280	}
281
282	type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
283
284	/// GlobSet represents a group of globs that can be matched together in a
285	/// single pass.
286	#[derive(Clone, Debug)]
287	pub struct GlobSet {
288	len: usize,
289	strats: Vec<GlobSetMatchStrategy>,
290	}
291
292	impl GlobSet {
293	/// Create an empty `GlobSet`. An empty set matches nothing.
294	#[inline]
295	pub fn empty() -> GlobSet {
296	GlobSet { len: `0`, strats: vec![] }
297	}
298
299	/// Returns true if this set is empty, and therefore matches nothing.
300	#[inline]
301	pub fn is_empty(&self) -> bool {
302	self.len == `0`
303	}
304
305	/// Returns the number of globs in this set.
306	#[inline]
307	pub fn len(&self) -> usize {
308	self.len
309	}
310
311	/// Returns true if any glob in this set matches the path given.
312	pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
313	self.is_match_candidate(&Candidate::new(path.as_ref()))
314	}
315
316	/// Returns true if any glob in this set matches the path given.
317	///
318	/// This takes a Candidate as input, which can be used to amortize the
319	/// cost of preparing a path for matching.
320	pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
321	if self.is_empty() {
322	return `false`;
323	}
324	for strat in &self.strats {
325	if strat.is_match(path) {
326	return `true`;
327	}
328	}
329	`false`
330	}
331
332	/// Returns the sequence number of every glob pattern that matches the
333	/// given path.
334	pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> {
335	self.matches_candidate(&Candidate::new(path.as_ref()))
336	}
337
338	/// Returns the sequence number of every glob pattern that matches the
339	/// given path.
340	///
341	/// This takes a Candidate as input, which can be used to amortize the
342	/// cost of preparing a path for matching.
343	pub fn matches_candidate(&self, path: &Candidate<'_>) -> Vec<usize> {
344	let mut into = vec![];
345	if self.is_empty() {
346	return into;
347	}
348	self.matches_candidate_into(path, &mut into);
349	into
350	}
351
352	/// Adds the sequence number of every glob pattern that matches the given
353	/// path to the vec given.
354	///
355	/// `into` is cleared before matching begins, and contains the set of
356	/// sequence numbers (in ascending order) after matching ends. If no globs
357	/// were matched, then `into` will be empty.
358	pub fn matches_into<P: AsRef<Path>>(
359	&self,
360	path: P,
361	into: &mut Vec<usize>,
362	) {
363	self.matches_candidate_into(&Candidate::new(path.as_ref()), into);
364	}
365
366	/// Adds the sequence number of every glob pattern that matches the given
367	/// path to the vec given.
368	///
369	/// `into` is cleared before matching begins, and contains the set of
370	/// sequence numbers (in ascending order) after matching ends. If no globs
371	/// were matched, then `into` will be empty.
372	///
373	/// This takes a Candidate as input, which can be used to amortize the
374	/// cost of preparing a path for matching.
375	pub fn matches_candidate_into(
376	&self,
377	path: &Candidate<'_>,
378	into: &mut Vec<usize>,
379	) {
380	into.clear();
381	if self.is_empty() {
382	return;
383	}
384	for strat in &self.strats {
385	strat.matches_into(path, into);
386	}
387	into.sort();
388	into.dedup();
389	}
390
391	fn new(pats: &[Glob]) -> Result<GlobSet, Error> {
392	if pats.is_empty() {
393	return Ok(GlobSet { len: `0`, strats: vec![] });
394	}
395	let mut lits = LiteralStrategy::new();
396	let mut base_lits = BasenameLiteralStrategy::new();
397	let mut exts = ExtensionStrategy::new();
398	let mut prefixes = MultiStrategyBuilder::new();
399	let mut suffixes = MultiStrategyBuilder::new();
400	let mut required_exts = RequiredExtensionStrategyBuilder::new();
401	let mut regexes = MultiStrategyBuilder::new();
402	for (i, p) in pats.iter().enumerate() {
403	match MatchStrategy::new(p) {
404	MatchStrategy::Literal(lit) => {
405	lits.add(i, lit);
406	}
407	MatchStrategy::BasenameLiteral(lit) => {
408	base_lits.add(i, lit);
409	}
410	MatchStrategy::Extension(ext) => {
411	exts.add(i, ext);
412	}
413	MatchStrategy::Prefix(prefix) => {
414	prefixes.add(i, prefix);
415	}
416	MatchStrategy::Suffix { suffix, component } => {
417	if component {
418	lits.add(i, suffix[`1`..].to_string());
419	}
420	suffixes.add(i, suffix);
421	}
422	MatchStrategy::RequiredExtension(ext) => {
423	required_exts.add(i, ext, p.regex().to_owned());
424	}
425	MatchStrategy::Regex => {
426	debug!("glob converted to regex: {:?}", p);
427	regexes.add(i, p.regex().to_owned());
428	}
429	}
430	}
431	debug!(
432	"built glob set; {} literals, {} basenames, {} extensions, \
433	{} prefixes, {} suffixes, {} required extensions, {} regexes",
434	lits.0.len(),
435	base_lits.0.len(),
436	exts.0.len(),
437	prefixes.literals.len(),
438	suffixes.literals.len(),
439	required_exts.0.len(),
440	regexes.literals.len()
441	);
442	Ok(GlobSet {
443	len: pats.len(),
444	strats: vec![
445	GlobSetMatchStrategy::Extension(exts),
446	GlobSetMatchStrategy::BasenameLiteral(base_lits),
447	GlobSetMatchStrategy::Literal(lits),
448	GlobSetMatchStrategy::Suffix(suffixes.suffix()),
449	GlobSetMatchStrategy::Prefix(prefixes.prefix()),
450	GlobSetMatchStrategy::RequiredExtension(
451	required_exts.build()?,
452	),
453	GlobSetMatchStrategy::Regex(regexes.regex_set()?),
454	],
455	})
456	}
457	}
458
459	impl Default for GlobSet {
460	/// Create a default empty GlobSet.
461	fn default() -> Self {
462	GlobSet::empty()
463	}
464	}
465
466	/// GlobSetBuilder builds a group of patterns that can be used to
467	/// simultaneously match a file path.
468	#[derive(Clone, Debug)]
469	pub struct GlobSetBuilder {
470	pats: Vec<Glob>,
471	}
472
473	impl GlobSetBuilder {
474	/// Create a new GlobSetBuilder. A GlobSetBuilder can be used to add new
475	/// patterns. Once all patterns have been added, `build` should be called
476	/// to produce a `GlobSet`, which can then be used for matching.
477	pub fn new() -> GlobSetBuilder {
478	GlobSetBuilder { pats: vec![] }
479	}
480
481	/// Builds a new matcher from all of the glob patterns added so far.
482	///
483	/// Once a matcher is built, no new patterns can be added to it.
484	pub fn build(&self) -> Result<GlobSet, Error> {
485	GlobSet::new(&self.pats)
486	}
487
488	/// Add a new pattern to this set.
489	pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder {
490	self.pats.push(pat);
491	self
492	}
493	}
494
495	/// A candidate path for matching.
496	///
497	/// All glob matching in this crate operates on `Candidate` values.
498	/// Constructing candidates has a very small cost associated with it, so
499	/// callers may find it beneficial to amortize that cost when matching a single
500	/// path against multiple globs or sets of globs.
501	#[derive(Clone, Debug)]
502	pub struct Candidate<'a> {
503	path: Cow<'a, [u8]>,
504	basename: Cow<'a, [u8]>,
505	ext: Cow<'a, [u8]>,
506	}
507
508	impl<'a> Candidate<'a> {
509	/// Create a new candidate for matching from the given path.
510	pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
511	let path = normalize_path(Vec::from_path_lossy(path.as_ref()));
512	let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
513	let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
514	Candidate { path: path, basename: basename, ext: ext }
515	}
516
517	fn path_prefix(&self, max: usize) -> &[u8] {
518	if self.path.len() <= max {
519	&*self.path
520	} else {
521	&self.path[..max]
522	}
523	}
524
525	fn path_suffix(&self, max: usize) -> &[u8] {
526	if self.path.len() <= max {
527	&*self.path
528	} else {
529	&self.path[self.path.len() - max..]
530	}
531	}
532	}
533
534	#[derive(Clone, Debug)]
535	enum GlobSetMatchStrategy {
536	Literal(LiteralStrategy),
537	BasenameLiteral(BasenameLiteralStrategy),
538	Extension(ExtensionStrategy),
539	Prefix(PrefixStrategy),
540	Suffix(SuffixStrategy),
541	RequiredExtension(RequiredExtensionStrategy),
542	Regex(RegexSetStrategy),
543	}
544
545	impl GlobSetMatchStrategy {
546	fn is_match(&self, candidate: &Candidate<'_>) -> bool {
547	use self::GlobSetMatchStrategy::*;
548	match *self {
549	Literal(ref s) => s.is_match(candidate),
550	BasenameLiteral(ref s) => s.is_match(candidate),
551	Extension(ref s) => s.is_match(candidate),
552	Prefix(ref s) => s.is_match(candidate),
553	Suffix(ref s) => s.is_match(candidate),
554	RequiredExtension(ref s) => s.is_match(candidate),
555	Regex(ref s) => s.is_match(candidate),
556	}
557	}
558
559	fn matches_into(
560	&self,
561	candidate: &Candidate<'_>,
562	matches: &mut Vec<usize>,
563	) {
564	use self::GlobSetMatchStrategy::*;
565	match *self {
566	Literal(ref s) => s.matches_into(candidate, matches),
567	BasenameLiteral(ref s) => s.matches_into(candidate, matches),
568	Extension(ref s) => s.matches_into(candidate, matches),
569	Prefix(ref s) => s.matches_into(candidate, matches),
570	Suffix(ref s) => s.matches_into(candidate, matches),
571	RequiredExtension(ref s) => s.matches_into(candidate, matches),
572	Regex(ref s) => s.matches_into(candidate, matches),
573	}
574	}
575	}
576
577	#[derive(Clone, Debug)]
578	struct LiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
579
580	impl LiteralStrategy {
581	fn new() -> LiteralStrategy {
582	LiteralStrategy(BTreeMap::new())
583	}
584
585	fn add(&mut self, global_index: usize, lit: String) {
586	self.0.entry(lit.into_bytes()).or_insert(default:vec![]).push(global_index);
587	}
588
589	fn is_match(&self, candidate: &Candidate<'_>) -> bool {
590	self.0.contains_key(candidate.path.as_bytes())
591	}
592
593	#[inline(never)]
594	fn matches_into(
595	&self,
596	candidate: &Candidate<'_>,
597	matches: &mut Vec<usize>,
598	) {
599	if let Some(hits: &Vec) = self.0.get(key:candidate.path.as_bytes()) {
600	matches.extend(iter:hits);
601	}
602	}
603	}
604
605	#[derive(Clone, Debug)]
606	struct BasenameLiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
607
608	impl BasenameLiteralStrategy {
609	fn new() -> BasenameLiteralStrategy {
610	BasenameLiteralStrategy(BTreeMap::new())
611	}
612
613	fn add(&mut self, global_index: usize, lit: String) {
614	self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
615	}
616
617	fn is_match(&self, candidate: &Candidate<'_>) -> bool {
618	if candidate.basename.is_empty() {
619	return `false`;
620	}
621	self.0.contains_key(candidate.basename.as_bytes())
622	}
623
624	#[inline(never)]
625	fn matches_into(
626	&self,
627	candidate: &Candidate<'_>,
628	matches: &mut Vec<usize>,
629	) {
630	if candidate.basename.is_empty() {
631	return;
632	}
633	if let Some(hits) = self.0.get(candidate.basename.as_bytes()) {
634	matches.extend(hits);
635	}
636	}
637	}
638
639	#[derive(Clone, Debug)]
640	struct ExtensionStrategy(HashMap<Vec<u8>, Vec<usize>, Fnv>);
641
642	impl ExtensionStrategy {
643	fn new() -> ExtensionStrategy {
644	ExtensionStrategy(HashMap::with_hasher(Fnv::default()))
645	}
646
647	fn add(&mut self, global_index: usize, ext: String) {
648	self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index);
649	}
650
651	fn is_match(&self, candidate: &Candidate<'_>) -> bool {
652	if candidate.ext.is_empty() {
653	return `false`;
654	}
655	self.0.contains_key(candidate.ext.as_bytes())
656	}
657
658	#[inline(never)]
659	fn matches_into(
660	&self,
661	candidate: &Candidate<'_>,
662	matches: &mut Vec<usize>,
663	) {
664	if candidate.ext.is_empty() {
665	return;
666	}
667	if let Some(hits) = self.0.get(candidate.ext.as_bytes()) {
668	matches.extend(hits);
669	}
670	}
671	}
672
673	#[derive(Clone, Debug)]
674	struct PrefixStrategy {
675	matcher: AhoCorasick,
676	map: Vec<usize>,
677	longest: usize,
678	}
679
680	impl PrefixStrategy {
681	fn is_match(&self, candidate: &Candidate<'_>) -> bool {
682	let path: &[u8] = candidate.path_prefix(self.longest);
683	for m: Match in self.matcher.find_overlapping_iter(haystack:path) {
684	if m.start() == `0` {
685	return `true`;
686	}
687	}
688	`false`
689	}
690
691	fn matches_into(
692	&self,
693	candidate: &Candidate<'_>,
694	matches: &mut Vec<usize>,
695	) {
696	let path: &[u8] = candidate.path_prefix(self.longest);
697	for m: Match in self.matcher.find_overlapping_iter(haystack:path) {
698	if m.start() == `0` {
699	matches.push(self.map[m.pattern()]);
700	}
701	}
702	}
703	}
704
705	#[derive(Clone, Debug)]
706	struct SuffixStrategy {
707	matcher: AhoCorasick,
708	map: Vec<usize>,
709	longest: usize,
710	}
711
712	impl SuffixStrategy {
713	fn is_match(&self, candidate: &Candidate<'_>) -> bool {
714	let path: &[u8] = candidate.path_suffix(self.longest);
715	for m: Match in self.matcher.find_overlapping_iter(haystack:path) {
716	if m.end() == path.len() {
717	return `true`;
718	}
719	}
720	`false`
721	}
722
723	fn matches_into(
724	&self,
725	candidate: &Candidate<'_>,
726	matches: &mut Vec<usize>,
727	) {
728	let path: &[u8] = candidate.path_suffix(self.longest);
729	for m: Match in self.matcher.find_overlapping_iter(haystack:path) {
730	if m.end() == path.len() {
731	matches.push(self.map[m.pattern()]);
732	}
733	}
734	}
735	}
736
737	#[derive(Clone, Debug)]
738	struct RequiredExtensionStrategy(HashMap<Vec<u8>, Vec<(usize, Regex)>, Fnv>);
739
740	impl RequiredExtensionStrategy {
741	fn is_match(&self, candidate: &Candidate<'_>) -> bool {
742	if candidate.ext.is_empty() {
743	return `false`;
744	}
745	match self.0.get(candidate.ext.as_bytes()) {
746	None => `false`,
747	Some(regexes) => {
748	for &(_, ref re) in regexes {
749	if re.is_match(candidate.path.as_bytes()) {
750	return `true`;
751	}
752	}
753	`false`
754	}
755	}
756	}
757
758	#[inline(never)]
759	fn matches_into(
760	&self,
761	candidate: &Candidate<'_>,
762	matches: &mut Vec<usize>,
763	) {
764	if candidate.ext.is_empty() {
765	return;
766	}
767	if let Some(regexes) = self.0.get(candidate.ext.as_bytes()) {
768	for &(global_index, ref re) in regexes {
769	if re.is_match(candidate.path.as_bytes()) {
770	matches.push(global_index);
771	}
772	}
773	}
774	}
775	}
776
777	#[derive(Clone, Debug)]
778	struct RegexSetStrategy {
779	matcher: RegexSet,
780	map: Vec<usize>,
781	}
782
783	impl RegexSetStrategy {
784	fn is_match(&self, candidate: &Candidate<'_>) -> bool {
785	self.matcher.is_match(text:candidate.path.as_bytes())
786	}
787
788	fn matches_into(
789	&self,
790	candidate: &Candidate<'_>,
791	matches: &mut Vec<usize>,
792	) {
793	for i: usize in self.matcher.matches(text:candidate.path.as_bytes()) {
794	matches.push(self.map[i]);
795	}
796	}
797	}
798
799	#[derive(Clone, Debug)]
800	struct MultiStrategyBuilder {
801	literals: Vec<String>,
802	map: Vec<usize>,
803	longest: usize,
804	}
805
806	impl MultiStrategyBuilder {
807	fn new() -> MultiStrategyBuilder {
808	MultiStrategyBuilder { literals: vec![], map: vec![], longest: `0` }
809	}
810
811	fn add(&mut self, global_index: usize, literal: String) {
812	if literal.len() > self.longest {
813	self.longest = literal.len();
814	}
815	self.map.push(global_index);
816	self.literals.push(literal);
817	}
818
819	fn prefix(self) -> PrefixStrategy {
820	PrefixStrategy {
821	matcher: AhoCorasick::new_auto_configured(&self.literals),
822	map: self.map,
823	longest: self.longest,
824	}
825	}
826
827	fn suffix(self) -> SuffixStrategy {
828	SuffixStrategy {
829	matcher: AhoCorasick::new_auto_configured(&self.literals),
830	map: self.map,
831	longest: self.longest,
832	}
833	}
834
835	fn regex_set(self) -> Result<RegexSetStrategy, Error> {
836	Ok(RegexSetStrategy {
837	matcher: new_regex_set(self.literals)?,
838	map: self.map,
839	})
840	}
841	}
842
843	#[derive(Clone, Debug)]
844	struct RequiredExtensionStrategyBuilder(
845	HashMap<Vec<u8>, Vec<(usize, String)>>,
846	);
847
848	impl RequiredExtensionStrategyBuilder {
849	fn new() -> RequiredExtensionStrategyBuilder {
850	RequiredExtensionStrategyBuilder(HashMap::new())
851	}
852
853	fn add(&mut self, global_index: usize, ext: String, regex: String) {
854	self.0
855	.entry(ext.into_bytes())
856	.or_insert(default:vec![])
857	.push((global_index, regex));
858	}
859
860	fn build(self) -> Result<RequiredExtensionStrategy, Error> {
861	let mut exts: HashMap, Vec<(usize, …)>, …> = HashMap::with_hasher(hash_builder:Fnv::default());
862	for (ext: Vec, regexes: Vec<(usize, String)>) in self.0.into_iter() {
863	exts.insert(k:ext.clone(), v:vec![]);
864	for (global_index: usize, regex: String) in regexes {
865	let compiled: Regex = new_regex(&regex)?;
866	exts.get_mut(&ext).unwrap().push((global_index, compiled));
867	}
868	}
869	Ok(RequiredExtensionStrategy(exts))
870	}
871	}
872
873	#[cfg(test)]
874	mod tests {
875	use super::{GlobSet, GlobSetBuilder};
876	use crate::glob::Glob;
877
878	#[test]
879	fn set_works() {
880	let mut builder = GlobSetBuilder::new();
881	builder.add(Glob::new("src/*/.rs").unwrap());
882	builder.add(Glob::new("*.c").unwrap());
883	builder.add(Glob::new("src/lib.rs").unwrap());
884	let set = builder.build().unwrap();
885
886	assert!(set.is_match("foo.c"));
887	assert!(set.is_match("src/foo.c"));
888	assert!(!set.is_match("foo.rs"));
889	assert!(!set.is_match("tests/foo.rs"));
890	assert!(set.is_match("src/foo.rs"));
891	assert!(set.is_match("src/grep/src/main.rs"));
892
893	let matches = set.matches("src/lib.rs");
894	assert_eq!(`2`, matches.len());
895	assert_eq!(`0`, matches[`0`]);
896	assert_eq!(`2`, matches[`1`]);
897	}
898
899	#[test]
900	fn empty_set_works() {
901	let set = GlobSetBuilder::new().build().unwrap();
902	assert!(!set.is_match(""));
903	assert!(!set.is_match("a"));
904	}
905
906	#[test]
907	fn default_set_is_empty_works() {
908	let set: GlobSet = Default::default();
909	assert!(!set.is_match(""));
910	assert!(!set.is_match("a"));
911	}
912	}
913