lib.rs source code [crates/globset/src/lib.rs]

1	/!*
2	The globset crate provides cross platform single glob and glob set matching.
3
4	Glob set matching is the process of matching one or more glob patterns against
5	a single candidate path simultaneously, and returning all of the globs that
6	matched. For example, given this set of globs:
7
8	* `*.rs`
9	* `src/lib.rs`
10	* `src/**/foo.rs`
11
12	and a path `src/bar/baz/foo.rs`, then the set would report the first and third
13	globs as matching.
14
15	# Example: one glob
16
17	This example shows how to match a single glob against a single file path.
18
19	```
20	use globset::Glob;
21
22	let glob = Glob::new("*.rs")?.compile_matcher();
23
24	assert!(glob.is_match("foo.rs"));
25	assert!(glob.is_match("foo/bar.rs"));
26	assert!(!glob.is_match("Cargo.toml"));
27	# Ok::<(), Box<dyn std::error::Error>>(())
28	```
29
30	# Example: configuring a glob matcher
31
32	This example shows how to use a `GlobBuilder` to configure aspects of match
33	semantics. In this example, we prevent wildcards from matching path separators.
34
35	```
36	use globset::GlobBuilder;
37
38	let glob = GlobBuilder::new("*.rs")
39	.literal_separator(`true`).build()?.compile_matcher();
40
41	assert!(glob.is_match("foo.rs"));
42	assert!(!glob.is_match("foo/bar.rs")); // no longer matches
43	assert!(!glob.is_match("Cargo.toml"));
44	# Ok::<(), Box<dyn std::error::Error>>(())
45	```
46
47	# Example: match multiple globs at once
48
49	This example shows how to match multiple glob patterns at once.
50
51	```
52	use globset::{Glob, GlobSetBuilder};
53
54	let mut builder = GlobSetBuilder::new();
55	// A GlobBuilder can be used to configure each glob's match semantics
56	// independently.
57	builder.add(Glob::new("*.rs")?);
58	builder.add(Glob::new("src/lib.rs")?);
59	builder.add(Glob::new("src/**/foo.rs")?);
60	let set = builder.build()?;
61
62	assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![`0`, `2`]);
63	# Ok::<(), Box<dyn std::error::Error>>(())
64	```
65
66	# Syntax
67
68	Standard Unix-style glob syntax is supported:
69
70	* `?` matches any single character. (If the `literal_separator` option is
71	enabled, then `?` can never match a path separator.)
72	* `*` matches zero or more characters. (If the `literal_separator` option is
73	enabled, then `` can never match a path separator.)*
74	* `**` recursively matches directories but are only legal in three situations.
75	First, if the glob starts with <code>\\/</code>, then it matches
76	all directories. For example, <code>\\/foo</code> matches `foo`
77	and `bar/foo` but not `foo/bar`. Secondly, if the glob ends with
78	<code>/\\</code>, then it matches all sub-entries. For example,
79	<code>foo/\\</code> matches `foo/a` and `foo/a/b`, but not `foo`.
80	Thirdly, if the glob contains <code>/\\/</code> anywhere within
81	the pattern, then it matches zero or more directories. Using `` anywhere
82	else is illegal (N.B. the glob `` is allowed and means "match everything").
83	* `{a,b}` matches `a` or `b` where `a` and `b` are arbitrary glob patterns.
84	(N.B. Nesting `{...}` is not currently allowed.)
85	* `[ab]` matches `a` or `b` where `a` and `b` are characters. Use
86	`[!ab]` to match any character except for `a` and `b`.
87	* Metacharacters such as `*` and `?` can be escaped with character class
88	notation. e.g., `[]` matches ``.
89	* When backslash escapes are enabled, a backslash (`\`) will escape all meta
90	characters in a glob. If it precedes a non-meta character, then the slash is
91	ignored. A `\\` will match a literal `\\`. Note that this mode is only
92	enabled on Unix platforms by default, but can be enabled on any platform
93	via the `backslash_escape` setting on `Glob`.
94
95	A `GlobBuilder` can be used to prevent wildcards from matching path separators,
96	or to enable case insensitive matching.
97	*/
98
99	#![deny(missing_docs)]
100
101	use std::{
102	borrow::Cow,
103	panic::{RefUnwindSafe, UnwindSafe},
104	path::Path,
105	sync::Arc,
106	};
107
108	use {
109	aho_corasick::AhoCorasick,
110	bstr::{ByteSlice, ByteVec, B},
111	regex_automata::{
112	meta::Regex,
113	util::pool::{Pool, PoolGuard},
114	PatternSet,
115	},
116	};
117
118	use crate::{
119	glob::MatchStrategy,
120	pathutil::{file_name, file_name_ext, normalize_path},
121	};
122
123	pub use crate::glob::{Glob, GlobBuilder, GlobMatcher};
124
125	mod fnv;
126	mod glob;
127	mod pathutil;
128
129	#[cfg(feature = "serde1")]
130	mod serde_impl;
131
132	#[cfg(feature = "log")]
133	macro_rules! debug {
134	($($token:tt)) => (::log::debug!($($token));)
135	}
136
137	#[cfg(not(feature = "log"))]
138	macro_rules! debug {
139	($($token:tt)*) => {};
140	}
141
142	/// Represents an error that can occur when parsing a glob pattern.
143	#[derive(Clone, Debug, Eq, PartialEq)]
144	pub struct Error {
145	/// The original glob provided by the caller.
146	glob: Option<String>,
147	/// The kind of error.
148	kind: ErrorKind,
149	}
150
151	/// The kind of error that can occur when parsing a glob pattern.
152	#[derive(Clone, Debug, Eq, PartialEq)]
153	pub enum ErrorKind {
154	/// DEPRECATED.
155	///
156	/// This error used to occur for consistency with git's glob specification,
157	/// but the specification now accepts all uses of ``. When `` does not
158	/// appear adjacent to a path separator or at the beginning/end of a glob,
159	/// it is now treated as two consecutive `` patterns. As such, this error*
160	/// is no longer used.
161	InvalidRecursive,
162	/// Occurs when a character class (e.g., `[abc]`) is not closed.
163	UnclosedClass,
164	/// Occurs when a range in a character (e.g., `[a-z]`) is invalid. For
165	/// example, if the range starts with a lexicographically larger character
166	/// than it ends with.
167	InvalidRange(char, char),
168	/// Occurs when a `}` is found without a matching `{`.
169	UnopenedAlternates,
170	/// Occurs when a `{` is found without a matching `}`.
171	UnclosedAlternates,
172	/// Occurs when an alternating group is nested inside another alternating
173	/// group, e.g., `{{a,b},{c,d}}`.
174	NestedAlternates,
175	/// Occurs when an unescaped '\' is found at the end of a glob.
176	DanglingEscape,
177	/// An error associated with parsing or compiling a regex.
178	Regex(String),
179	/// Hints that destructuring should not be exhaustive.
180	///
181	/// This enum may grow additional variants, so this makes sure clients
182	/// don't count on exhaustive matching. (Otherwise, adding a new variant
183	/// could break existing code.)
184	#[doc(hidden)]
185	__Nonexhaustive,
186	}
187
188	impl std::error::Error for Error {
189	fn description(&self) -> &str {
190	self.kind.description()
191	}
192	}
193
194	impl Error {
195	/// Return the glob that caused this error, if one exists.
196	pub fn glob(&self) -> Option<&str> {
197	self.glob.as_ref().map(\|s: &String\| &**s)
198	}
199
200	/// Return the kind of this error.
201	pub fn kind(&self) -> &ErrorKind {
202	&self.kind
203	}
204	}
205
206	impl ErrorKind {
207	fn description(&self) -> &str {
208	match *self {
209	ErrorKind::InvalidRecursive => {
210	"invalid use of **; must be one path component"
211	}
212	ErrorKind::UnclosedClass => {
213	"unclosed character class; missing ']'"
214	}
215	ErrorKind::InvalidRange(_, _) => "invalid character range",
216	ErrorKind::UnopenedAlternates => {
217	"unopened alternate group; missing '{' \
218	(maybe escape '}' with '[}]'?)"
219	}
220	ErrorKind::UnclosedAlternates => {
221	"unclosed alternate group; missing '}' \
222	(maybe escape '{' with '[{]'?)"
223	}
224	ErrorKind::NestedAlternates => {
225	"nested alternate groups are not allowed"
226	}
227	ErrorKind::DanglingEscape => "dangling '`\\`'",
228	ErrorKind::Regex(ref err) => err,
229	ErrorKind::__Nonexhaustive => unreachable!(),
230	}
231	}
232	}
233
234	impl std::fmt::Display for Error {
235	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
236	match self.glob {
237	None => self.kind.fmt(f),
238	Some(ref glob: &String) => {
239	write!(f, "error parsing glob '{}': {}", glob, self.kind)
240	}
241	}
242	}
243	}
244
245	impl std::fmt::Display for ErrorKind {
246	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
247	match *self {
248	ErrorKind::InvalidRecursive
249	\| ErrorKind::UnclosedClass
250	\| ErrorKind::UnopenedAlternates
251	\| ErrorKind::UnclosedAlternates
252	\| ErrorKind::NestedAlternates
253	\| ErrorKind::DanglingEscape
254	\| ErrorKind::Regex(_) => write!(f, "{}", self.description()),
255	ErrorKind::InvalidRange(s: char, e: char) => {
256	write!(f, "invalid range; '{}' > '{}'", s, e)
257	}
258	ErrorKind::__Nonexhaustive => unreachable!(),
259	}
260	}
261	}
262
263	fn new_regex(pat: &str) -> Result<Regex, Error> {
264	let syntax: Config = regex_automata::util::syntax::Config::new()
265	.utf8(`false`)
266	.dot_matches_new_line(yes:`true`);
267	let config: Config = Regex::config()
268	.utf8_empty(`false`)
269	.nfa_size_limit(Some(`10` * (`1` << `20`)))
270	.hybrid_cache_capacity(limit:`10` * (`1` << `20`));
271	Regex::builder().syntax(syntax).configure(config).build(pat).map_err(
272	\|err: BuildError\| Error {
273	glob: Some(pat.to_string()),
274	kind: ErrorKind::Regex(err.to_string()),
275	},
276	)
277	}
278
279	fn new_regex_set(pats: Vec<String>) -> Result<Regex, Error> {
280	let syntax: Config = regex_automata::util::syntax::Config::new()
281	.utf8(`false`)
282	.dot_matches_new_line(yes:`true`);
283	let config: Config = Regex::config()
284	.match_kind(regex_automata::MatchKind::All)
285	.utf8_empty(`false`)
286	.nfa_size_limit(Some(`10` * (`1` << `20`)))
287	.hybrid_cache_capacity(limit:`10` * (`1` << `20`));
288	Regex::builder()
289	.syntax(syntax)
290	.configure(config)
291	.build_many(&pats)
292	.map_err(\|err: BuildError\| Error {
293	glob: None,
294	kind: ErrorKind::Regex(err.to_string()),
295	})
296	}
297
298	/// GlobSet represents a group of globs that can be matched together in a
299	/// single pass.
300	#[derive(Clone, Debug)]
301	pub struct GlobSet {
302	len: usize,
303	strats: Vec<GlobSetMatchStrategy>,
304	}
305
306	impl GlobSet {
307	/// Create a new [`GlobSetBuilder`]. A `GlobSetBuilder` can be used to add
308	/// new patterns. Once all patterns have been added, `build` should be
309	/// called to produce a `GlobSet`, which can then be used for matching.
310	#[inline]
311	pub fn builder() -> GlobSetBuilder {
312	GlobSetBuilder::new()
313	}
314
315	/// Create an empty `GlobSet`. An empty set matches nothing.
316	#[inline]
317	pub fn empty() -> GlobSet {
318	GlobSet { len: `0`, strats: vec![] }
319	}
320
321	/// Returns true if this set is empty, and therefore matches nothing.
322	#[inline]
323	pub fn is_empty(&self) -> bool {
324	self.len == `0`
325	}
326
327	/// Returns the number of globs in this set.
328	#[inline]
329	pub fn len(&self) -> usize {
330	self.len
331	}
332
333	/// Returns true if any glob in this set matches the path given.
334	pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
335	self.is_match_candidate(&Candidate::new(path.as_ref()))
336	}
337
338	/// Returns true if any glob in this set matches the path given.
339	///
340	/// This takes a Candidate as input, which can be used to amortize the
341	/// cost of preparing a path for matching.
342	pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
343	if self.is_empty() {
344	return `false`;
345	}
346	for strat in &self.strats {
347	if strat.is_match(path) {
348	return `true`;
349	}
350	}
351	`false`
352	}
353
354	/// Returns the sequence number of every glob pattern that matches the
355	/// given path.
356	pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> {
357	self.matches_candidate(&Candidate::new(path.as_ref()))
358	}
359
360	/// Returns the sequence number of every glob pattern that matches the
361	/// given path.
362	///
363	/// This takes a Candidate as input, which can be used to amortize the
364	/// cost of preparing a path for matching.
365	pub fn matches_candidate(&self, path: &Candidate<'_>) -> Vec<usize> {
366	let mut into = vec![];
367	if self.is_empty() {
368	return into;
369	}
370	self.matches_candidate_into(path, &mut into);
371	into
372	}
373
374	/// Adds the sequence number of every glob pattern that matches the given
375	/// path to the vec given.
376	///
377	/// `into` is cleared before matching begins, and contains the set of
378	/// sequence numbers (in ascending order) after matching ends. If no globs
379	/// were matched, then `into` will be empty.
380	pub fn matches_into<P: AsRef<Path>>(
381	&self,
382	path: P,
383	into: &mut Vec<usize>,
384	) {
385	self.matches_candidate_into(&Candidate::new(path.as_ref()), into);
386	}
387
388	/// Adds the sequence number of every glob pattern that matches the given
389	/// path to the vec given.
390	///
391	/// `into` is cleared before matching begins, and contains the set of
392	/// sequence numbers (in ascending order) after matching ends. If no globs
393	/// were matched, then `into` will be empty.
394	///
395	/// This takes a Candidate as input, which can be used to amortize the
396	/// cost of preparing a path for matching.
397	pub fn matches_candidate_into(
398	&self,
399	path: &Candidate<'_>,
400	into: &mut Vec<usize>,
401	) {
402	into.clear();
403	if self.is_empty() {
404	return;
405	}
406	for strat in &self.strats {
407	strat.matches_into(path, into);
408	}
409	into.sort();
410	into.dedup();
411	}
412
413	fn new(pats: &[Glob]) -> Result<GlobSet, Error> {
414	if pats.is_empty() {
415	return Ok(GlobSet { len: `0`, strats: vec![] });
416	}
417	let mut lits = LiteralStrategy::new();
418	let mut base_lits = BasenameLiteralStrategy::new();
419	let mut exts = ExtensionStrategy::new();
420	let mut prefixes = MultiStrategyBuilder::new();
421	let mut suffixes = MultiStrategyBuilder::new();
422	let mut required_exts = RequiredExtensionStrategyBuilder::new();
423	let mut regexes = MultiStrategyBuilder::new();
424	for (i, p) in pats.iter().enumerate() {
425	match MatchStrategy::new(p) {
426	MatchStrategy::Literal(lit) => {
427	lits.add(i, lit);
428	}
429	MatchStrategy::BasenameLiteral(lit) => {
430	base_lits.add(i, lit);
431	}
432	MatchStrategy::Extension(ext) => {
433	exts.add(i, ext);
434	}
435	MatchStrategy::Prefix(prefix) => {
436	prefixes.add(i, prefix);
437	}
438	MatchStrategy::Suffix { suffix, component } => {
439	if component {
440	lits.add(i, suffix[`1`..].to_string());
441	}
442	suffixes.add(i, suffix);
443	}
444	MatchStrategy::RequiredExtension(ext) => {
445	required_exts.add(i, ext, p.regex().to_owned());
446	}
447	MatchStrategy::Regex => {
448	debug!("glob converted to regex: {:?}", p);
449	regexes.add(i, p.regex().to_owned());
450	}
451	}
452	}
453	debug!(
454	"built glob set; {} literals, {} basenames, {} extensions, \
455	{} prefixes, {} suffixes, {} required extensions, {} regexes",
456	lits.0.len(),
457	base_lits.0.len(),
458	exts.0.len(),
459	prefixes.literals.len(),
460	suffixes.literals.len(),
461	required_exts.0.len(),
462	regexes.literals.len()
463	);
464	Ok(GlobSet {
465	len: pats.len(),
466	strats: vec![
467	GlobSetMatchStrategy::Extension(exts),
468	GlobSetMatchStrategy::BasenameLiteral(base_lits),
469	GlobSetMatchStrategy::Literal(lits),
470	GlobSetMatchStrategy::Suffix(suffixes.suffix()),
471	GlobSetMatchStrategy::Prefix(prefixes.prefix()),
472	GlobSetMatchStrategy::RequiredExtension(
473	required_exts.build()?,
474	),
475	GlobSetMatchStrategy::Regex(regexes.regex_set()?),
476	],
477	})
478	}
479	}
480
481	impl Default for GlobSet {
482	/// Create a default empty GlobSet.
483	fn default() -> Self {
484	GlobSet::empty()
485	}
486	}
487
488	/// GlobSetBuilder builds a group of patterns that can be used to
489	/// simultaneously match a file path.
490	#[derive(Clone, Debug)]
491	pub struct GlobSetBuilder {
492	pats: Vec<Glob>,
493	}
494
495	impl GlobSetBuilder {
496	/// Create a new `GlobSetBuilder`. A `GlobSetBuilder` can be used to add new
497	/// patterns. Once all patterns have been added, `build` should be called
498	/// to produce a [`GlobSet`], which can then be used for matching.
499	pub fn new() -> GlobSetBuilder {
500	GlobSetBuilder { pats: vec![] }
501	}
502
503	/// Builds a new matcher from all of the glob patterns added so far.
504	///
505	/// Once a matcher is built, no new patterns can be added to it.
506	pub fn build(&self) -> Result<GlobSet, Error> {
507	GlobSet::new(&self.pats)
508	}
509
510	/// Add a new pattern to this set.
511	pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder {
512	self.pats.push(pat);
513	self
514	}
515	}
516
517	/// A candidate path for matching.
518	///
519	/// All glob matching in this crate operates on `Candidate` values.
520	/// Constructing candidates has a very small cost associated with it, so
521	/// callers may find it beneficial to amortize that cost when matching a single
522	/// path against multiple globs or sets of globs.
523	#[derive(Clone)]
524	pub struct Candidate<'a> {
525	path: Cow<'a, [u8]>,
526	basename: Cow<'a, [u8]>,
527	ext: Cow<'a, [u8]>,
528	}
529
530	impl<'a> std::fmt::Debug for Candidate<'a> {
531	fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
532	f&mut DebugStruct<'_, '_>.debug_struct("Candidate")
533	.field("path", &self.path.as_bstr())
534	.field("basename", &self.basename.as_bstr())
535	.field(name:"ext", &self.ext.as_bstr())
536	.finish()
537	}
538	}
539
540	impl<'a> Candidate<'a> {
541	/// Create a new candidate for matching from the given path.
542	pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
543	let path = normalize_path(Vec::from_path_lossy(path.as_ref()));
544	let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
545	let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
546	Candidate { path, basename, ext }
547	}
548
549	fn path_prefix(&self, max: usize) -> &[u8] {
550	if self.path.len() <= max {
551	&*self.path
552	} else {
553	&self.path[..max]
554	}
555	}
556
557	fn path_suffix(&self, max: usize) -> &[u8] {
558	if self.path.len() <= max {
559	&*self.path
560	} else {
561	&self.path[self.path.len() - max..]
562	}
563	}
564	}
565
566	#[derive(Clone, Debug)]
567	enum GlobSetMatchStrategy {
568	Literal(LiteralStrategy),
569	BasenameLiteral(BasenameLiteralStrategy),
570	Extension(ExtensionStrategy),
571	Prefix(PrefixStrategy),
572	Suffix(SuffixStrategy),
573	RequiredExtension(RequiredExtensionStrategy),
574	Regex(RegexSetStrategy),
575	}
576
577	impl GlobSetMatchStrategy {
578	fn is_match(&self, candidate: &Candidate<'_>) -> bool {
579	use self::GlobSetMatchStrategy::*;
580	match *self {
581	Literal(ref s) => s.is_match(candidate),
582	BasenameLiteral(ref s) => s.is_match(candidate),
583	Extension(ref s) => s.is_match(candidate),
584	Prefix(ref s) => s.is_match(candidate),
585	Suffix(ref s) => s.is_match(candidate),
586	RequiredExtension(ref s) => s.is_match(candidate),
587	Regex(ref s) => s.is_match(candidate),
588	}
589	}
590
591	fn matches_into(
592	&self,
593	candidate: &Candidate<'_>,
594	matches: &mut Vec<usize>,
595	) {
596	use self::GlobSetMatchStrategy::*;
597	match *self {
598	Literal(ref s) => s.matches_into(candidate, matches),
599	BasenameLiteral(ref s) => s.matches_into(candidate, matches),
600	Extension(ref s) => s.matches_into(candidate, matches),
601	Prefix(ref s) => s.matches_into(candidate, matches),
602	Suffix(ref s) => s.matches_into(candidate, matches),
603	RequiredExtension(ref s) => s.matches_into(candidate, matches),
604	Regex(ref s) => s.matches_into(candidate, matches),
605	}
606	}
607	}
608
609	#[derive(Clone, Debug)]
610	struct LiteralStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>);
611
612	impl LiteralStrategy {
613	fn new() -> LiteralStrategy {
614	LiteralStrategy(fnv::HashMap::default())
615	}
616
617	fn add(&mut self, global_index: usize, lit: String) {
618	self.0.entry(lit.into_bytes()).or_insert(default:vec![]).push(global_index);
619	}
620
621	fn is_match(&self, candidate: &Candidate<'_>) -> bool {
622	self.0.contains_key(candidate.path.as_bytes())
623	}
624
625	#[inline(never)]
626	fn matches_into(
627	&self,
628	candidate: &Candidate<'_>,
629	matches: &mut Vec<usize>,
630	) {
631	if let Some(hits: &Vec) = self.0.get(candidate.path.as_bytes()) {
632	matches.extend(iter:hits);
633	}
634	}
635	}
636
637	#[derive(Clone, Debug)]
638	struct BasenameLiteralStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>);
639
640	impl BasenameLiteralStrategy {
641	fn new() -> BasenameLiteralStrategy {
642	BasenameLiteralStrategy(fnv::HashMap::default())
643	}
644
645	fn add(&mut self, global_index: usize, lit: String) {
646	self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
647	}
648
649	fn is_match(&self, candidate: &Candidate<'_>) -> bool {
650	if candidate.basename.is_empty() {
651	return `false`;
652	}
653	self.0.contains_key(candidate.basename.as_bytes())
654	}
655
656	#[inline(never)]
657	fn matches_into(
658	&self,
659	candidate: &Candidate<'_>,
660	matches: &mut Vec<usize>,
661	) {
662	if candidate.basename.is_empty() {
663	return;
664	}
665	if let Some(hits) = self.0.get(candidate.basename.as_bytes()) {
666	matches.extend(hits);
667	}
668	}
669	}
670
671	#[derive(Clone, Debug)]
672	struct ExtensionStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>);
673
674	impl ExtensionStrategy {
675	fn new() -> ExtensionStrategy {
676	ExtensionStrategy(fnv::HashMap::default())
677	}
678
679	fn add(&mut self, global_index: usize, ext: String) {
680	self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index);
681	}
682
683	fn is_match(&self, candidate: &Candidate<'_>) -> bool {
684	if candidate.ext.is_empty() {
685	return `false`;
686	}
687	self.0.contains_key(candidate.ext.as_bytes())
688	}
689
690	#[inline(never)]
691	fn matches_into(
692	&self,
693	candidate: &Candidate<'_>,
694	matches: &mut Vec<usize>,
695	) {
696	if candidate.ext.is_empty() {
697	return;
698	}
699	if let Some(hits) = self.0.get(candidate.ext.as_bytes()) {
700	matches.extend(hits);
701	}
702	}
703	}
704
705	#[derive(Clone, Debug)]
706	struct PrefixStrategy {
707	matcher: AhoCorasick,
708	map: Vec<usize>,
709	longest: usize,
710	}
711
712	impl PrefixStrategy {
713	fn is_match(&self, candidate: &Candidate<'_>) -> bool {
714	let path: &[u8] = candidate.path_prefix(self.longest);
715	for m: Match in self.matcher.find_overlapping_iter(input:path) {
716	if m.start() == `0` {
717	return `true`;
718	}
719	}
720	`false`
721	}
722
723	fn matches_into(
724	&self,
725	candidate: &Candidate<'_>,
726	matches: &mut Vec<usize>,
727	) {
728	let path: &[u8] = candidate.path_prefix(self.longest);
729	for m: Match in self.matcher.find_overlapping_iter(input:path) {
730	if m.start() == `0` {
731	matches.push(self.map[m.pattern()]);
732	}
733	}
734	}
735	}
736
737	#[derive(Clone, Debug)]
738	struct SuffixStrategy {
739	matcher: AhoCorasick,
740	map: Vec<usize>,
741	longest: usize,
742	}
743
744	impl SuffixStrategy {
745	fn is_match(&self, candidate: &Candidate<'_>) -> bool {
746	let path: &[u8] = candidate.path_suffix(self.longest);
747	for m: Match in self.matcher.find_overlapping_iter(input:path) {
748	if m.end() == path.len() {
749	return `true`;
750	}
751	}
752	`false`
753	}
754
755	fn matches_into(
756	&self,
757	candidate: &Candidate<'_>,
758	matches: &mut Vec<usize>,
759	) {
760	let path: &[u8] = candidate.path_suffix(self.longest);
761	for m: Match in self.matcher.find_overlapping_iter(input:path) {
762	if m.end() == path.len() {
763	matches.push(self.map[m.pattern()]);
764	}
765	}
766	}
767	}
768
769	#[derive(Clone, Debug)]
770	struct RequiredExtensionStrategy(fnv::HashMap<Vec<u8>, Vec<(usize, Regex)>>);
771
772	impl RequiredExtensionStrategy {
773	fn is_match(&self, candidate: &Candidate<'_>) -> bool {
774	if candidate.ext.is_empty() {
775	return `false`;
776	}
777	match self.0.get(candidate.ext.as_bytes()) {
778	None => `false`,
779	Some(regexes) => {
780	for &(_, ref re) in regexes {
781	if re.is_match(candidate.path.as_bytes()) {
782	return `true`;
783	}
784	}
785	`false`
786	}
787	}
788	}
789
790	#[inline(never)]
791	fn matches_into(
792	&self,
793	candidate: &Candidate<'_>,
794	matches: &mut Vec<usize>,
795	) {
796	if candidate.ext.is_empty() {
797	return;
798	}
799	if let Some(regexes) = self.0.get(candidate.ext.as_bytes()) {
800	for &(global_index, ref re) in regexes {
801	if re.is_match(candidate.path.as_bytes()) {
802	matches.push(global_index);
803	}
804	}
805	}
806	}
807	}
808
809	#[derive(Clone, Debug)]
810	struct RegexSetStrategy {
811	matcher: Regex,
812	map: Vec<usize>,
813	// We use a pool of PatternSets to hopefully allocating a fresh one on each
814	// call.
815	//
816	// TODO: In the next semver breaking release, we should drop this pool and
817	// expose an opaque type that wraps PatternSet. Then callers can provide
818	// it to `matches_into` directly. Callers might still want to use a pool
819	// or similar to amortize allocation, but that matches the status quo and
820	// absolves us of needing to do it here.
821	patset: Arc<Pool<PatternSet, PatternSetPoolFn>>,
822	}
823
824	type PatternSetPoolFn =
825	Box<dyn Fn() -> PatternSet + Send + Sync + UnwindSafe + RefUnwindSafe>;
826
827	impl RegexSetStrategy {
828	fn is_match(&self, candidate: &Candidate<'_>) -> bool {
829	self.matcher.is_match(input:candidate.path.as_bytes())
830	}
831
832	fn matches_into(
833	&self,
834	candidate: &Candidate<'_>,
835	matches: &mut Vec<usize>,
836	) {
837	let input: Input<'_> = regex_automata::Input::new(haystack:candidate.path.as_bytes());
838	let mut patset: PoolGuard<'_, PatternSet, …> = self.patset.get();
839	patset.clear();
840	self.matcher.which_overlapping_matches(&input, &mut patset);
841	for i: PatternID in patset.iter() {
842	matches.push(self.map[i]);
843	}
844	PoolGuard::put(this:patset);
845	}
846	}
847
848	#[derive(Clone, Debug)]
849	struct MultiStrategyBuilder {
850	literals: Vec<String>,
851	map: Vec<usize>,
852	longest: usize,
853	}
854
855	impl MultiStrategyBuilder {
856	fn new() -> MultiStrategyBuilder {
857	MultiStrategyBuilder { literals: vec![], map: vec![], longest: `0` }
858	}
859
860	fn add(&mut self, global_index: usize, literal: String) {
861	if literal.len() > self.longest {
862	self.longest = literal.len();
863	}
864	self.map.push(global_index);
865	self.literals.push(literal);
866	}
867
868	fn prefix(self) -> PrefixStrategy {
869	PrefixStrategy {
870	matcher: AhoCorasick::new(&self.literals).unwrap(),
871	map: self.map,
872	longest: self.longest,
873	}
874	}
875
876	fn suffix(self) -> SuffixStrategy {
877	SuffixStrategy {
878	matcher: AhoCorasick::new(&self.literals).unwrap(),
879	map: self.map,
880	longest: self.longest,
881	}
882	}
883
884	fn regex_set(self) -> Result<RegexSetStrategy, Error> {
885	let matcher = new_regex_set(self.literals)?;
886	let pattern_len = matcher.pattern_len();
887	let create: PatternSetPoolFn =
888	Box::new(move \|\| PatternSet::new(pattern_len));
889	Ok(RegexSetStrategy {
890	matcher,
891	map: self.map,
892	patset: Arc::new(Pool::new(create)),
893	})
894	}
895	}
896
897	#[derive(Clone, Debug)]
898	struct RequiredExtensionStrategyBuilder(
899	fnv::HashMap<Vec<u8>, Vec<(usize, String)>>,
900	);
901
902	impl RequiredExtensionStrategyBuilder {
903	fn new() -> RequiredExtensionStrategyBuilder {
904	RequiredExtensionStrategyBuilder(fnv::HashMap::default())
905	}
906
907	fn add(&mut self, global_index: usize, ext: String, regex: String) {
908	self.0
909	.entry(ext.into_bytes())
910	.or_insert(default:vec![])
911	.push((global_index, regex));
912	}
913
914	fn build(self) -> Result<RequiredExtensionStrategy, Error> {
915	let mut exts: HashMap, Vec<(usize, …)>, …> = fnv::HashMap::default();
916	for (ext: Vec, regexes: Vec<(usize, String)>) in self.0.into_iter() {
917	exts.insert(k:ext.clone(), v:vec![]);
918	for (global_index: usize, regex: String) in regexes {
919	let compiled: Regex = new_regex(&regex)?;
920	exts.get_mut(&ext).unwrap().push((global_index, compiled));
921	}
922	}
923	Ok(RequiredExtensionStrategy(exts))
924	}
925	}
926
927	/// Escape meta-characters within the given glob pattern.
928	///
929	/// The escaping works by surrounding meta-characters with brackets. For
930	/// example, `` becomes `[]`.
931	pub fn escape(s: &str) -> String {
932	let mut escaped: String = String::with_capacity(s.len());
933	for c: char in s.chars() {
934	match c {
935	// note that ! does not need escaping because it is only special
936	// inside brackets
937	'?' \| '*' \| '[' \| ']' => {
938	escaped.push(ch:'[');
939	escaped.push(ch:c);
940	escaped.push(ch:']');
941	}
942	c: char => {
943	escaped.push(ch:c);
944	}
945	}
946	}
947	escaped
948	}
949
950	#[cfg(test)]
951	mod tests {
952	use crate::glob::Glob;
953
954	use super::{GlobSet, GlobSetBuilder};
955
956	#[test]
957	fn set_works() {
958	let mut builder = GlobSetBuilder::new();
959	builder.add(Glob::new("src/*/.rs").unwrap());
960	builder.add(Glob::new("*.c").unwrap());
961	builder.add(Glob::new("src/lib.rs").unwrap());
962	let set = builder.build().unwrap();
963
964	assert!(set.is_match("foo.c"));
965	assert!(set.is_match("src/foo.c"));
966	assert!(!set.is_match("foo.rs"));
967	assert!(!set.is_match("tests/foo.rs"));
968	assert!(set.is_match("src/foo.rs"));
969	assert!(set.is_match("src/grep/src/main.rs"));
970
971	let matches = set.matches("src/lib.rs");
972	assert_eq!(`2`, matches.len());
973	assert_eq!(`0`, matches[`0`]);
974	assert_eq!(`2`, matches[`1`]);
975	}
976
977	#[test]
978	fn empty_set_works() {
979	let set = GlobSetBuilder::new().build().unwrap();
980	assert!(!set.is_match(""));
981	assert!(!set.is_match("a"));
982	}
983
984	#[test]
985	fn default_set_is_empty_works() {
986	let set: GlobSet = Default::default();
987	assert!(!set.is_match(""));
988	assert!(!set.is_match("a"));
989	}
990
991	#[test]
992	fn escape() {
993	use super::escape;
994	assert_eq!("foo", escape("foo"));
995	assert_eq!("foo[]", escape("foo"));
996	assert_eq!("[[][]]", escape("[]"));
997	assert_eq!("[][?]", escape("?"));
998	assert_eq!("src/[][]/[].rs", escape("src//.rs"));
999	assert_eq!("bar[[]ab[]]baz", escape("bar[ab]baz"));
1000	assert_eq!("bar[[]!![]]!baz", escape("bar[!!]!baz"));
1001	}
1002
1003	// This tests that regex matching doesn't "remember" the results of
1004	// previous searches. That is, if any memory is reused from a previous
1005	// search, then it should be cleared first.
1006	#[test]
1007	fn set_does_not_remember() {
1008	let mut builder = GlobSetBuilder::new();
1009	builder.add(Glob::new("foo").unwrap());
1010	builder.add(Glob::new("bar").unwrap());
1011	builder.add(Glob::new("quux").unwrap());
1012	let set = builder.build().unwrap();
1013
1014	let matches = set.matches("ZfooZquuxZ");
1015	assert_eq!(`2`, matches.len());
1016	assert_eq!(`0`, matches[`0`]);
1017	assert_eq!(`2`, matches[`1`]);
1018
1019	let matches = set.matches("nada");
1020	assert_eq!(`0`, matches.len());
1021	}
1022	}
1023