glob.rs source code [crates/globset/src/glob.rs]

1	use std::fmt::Write;
2	use std::path::{is_separator, Path};
3
4	use regex_automata::meta::Regex;
5
6	use crate::{new_regex, Candidate, Error, ErrorKind};
7
8	/// Describes a matching strategy for a particular pattern.
9	///
10	/// This provides a way to more quickly determine whether a pattern matches
11	/// a particular file path in a way that scales with a large number of
12	/// patterns. For example, if many patterns are of the form `.ext`, then it's*
13	/// possible to test whether any of those patterns matches by looking up a
14	/// file path's extension in a hash table.
15	#[derive(Clone, Debug, Eq, PartialEq)]
16	pub(crate) enum MatchStrategy {
17	/// A pattern matches if and only if the entire file path matches this
18	/// literal string.
19	Literal(String),
20	/// A pattern matches if and only if the file path's basename matches this
21	/// literal string.
22	BasenameLiteral(String),
23	/// A pattern matches if and only if the file path's extension matches this
24	/// literal string.
25	Extension(String),
26	/// A pattern matches if and only if this prefix literal is a prefix of the
27	/// candidate file path.
28	Prefix(String),
29	/// A pattern matches if and only if this prefix literal is a prefix of the
30	/// candidate file path.
31	///
32	/// An exception: if `component` is true, then `suffix` must appear at the
33	/// beginning of a file path or immediately following a `/`.
34	Suffix {
35	/// The actual suffix.
36	suffix: String,
37	/// Whether this must start at the beginning of a path component.
38	component: bool,
39	},
40	/// A pattern matches only if the given extension matches the file path's
41	/// extension. Note that this is a necessary but NOT sufficient criterion.
42	/// Namely, if the extension matches, then a full regex search is still
43	/// required.
44	RequiredExtension(String),
45	/// A regex needs to be used for matching.
46	Regex,
47	}
48
49	impl MatchStrategy {
50	/// Returns a matching strategy for the given pattern.
51	pub(crate) fn new(pat: &Glob) -> MatchStrategy {
52	if let Some(lit: String) = pat.basename_literal() {
53	MatchStrategy::BasenameLiteral(lit)
54	} else if let Some(lit: String) = pat.literal() {
55	MatchStrategy::Literal(lit)
56	} else if let Some(ext: String) = pat.ext() {
57	MatchStrategy::Extension(ext)
58	} else if let Some(prefix: String) = pat.prefix() {
59	MatchStrategy::Prefix(prefix)
60	} else if let Some((suffix: String, component: bool)) = pat.suffix() {
61	MatchStrategy::Suffix { suffix, component }
62	} else if let Some(ext: String) = pat.required_ext() {
63	MatchStrategy::RequiredExtension(ext)
64	} else {
65	MatchStrategy::Regex
66	}
67	}
68	}
69
70	/// Glob represents a successfully parsed shell glob pattern.
71	///
72	/// It cannot be used directly to match file paths, but it can be converted
73	/// to a regular expression string or a matcher.
74	#[derive(Clone, Debug, Eq)]
75	pub struct Glob {
76	glob: String,
77	re: String,
78	opts: GlobOptions,
79	tokens: Tokens,
80	}
81
82	impl PartialEq for Glob {
83	fn eq(&self, other: &Glob) -> bool {
84	self.glob == other.glob && self.opts == other.opts
85	}
86	}
87
88	impl std::hash::Hash for Glob {
89	fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
90	self.glob.hash(state);
91	self.opts.hash(state);
92	}
93	}
94
95	impl std::fmt::Display for Glob {
96	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97	self.glob.fmt(f)
98	}
99	}
100
101	impl std::str::FromStr for Glob {
102	type Err = Error;
103
104	fn from_str(glob: &str) -> Result<Self, Self::Err> {
105	Self::new(glob)
106	}
107	}
108
109	/// A matcher for a single pattern.
110	#[derive(Clone, Debug)]
111	pub struct GlobMatcher {
112	/// The underlying pattern.
113	pat: Glob,
114	/// The pattern, as a compiled regex.
115	re: Regex,
116	}
117
118	impl GlobMatcher {
119	/// Tests whether the given path matches this pattern or not.
120	pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
121	self.is_match_candidate(&Candidate::new(path.as_ref()))
122	}
123
124	/// Tests whether the given path matches this pattern or not.
125	pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
126	self.re.is_match(&path.path)
127	}
128
129	/// Returns the `Glob` used to compile this matcher.
130	pub fn glob(&self) -> &Glob {
131	&self.pat
132	}
133	}
134
135	/// A strategic matcher for a single pattern.
136	#[cfg(test)]
137	#[derive(Clone, Debug)]
138	struct GlobStrategic {
139	/// The match strategy to use.
140	strategy: MatchStrategy,
141	/// The pattern, as a compiled regex.
142	re: Regex,
143	}
144
145	#[cfg(test)]
146	impl GlobStrategic {
147	/// Tests whether the given path matches this pattern or not.
148	fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
149	self.is_match_candidate(&Candidate::new(path.as_ref()))
150	}
151
152	/// Tests whether the given path matches this pattern or not.
153	fn is_match_candidate(&self, candidate: &Candidate<'_>) -> bool {
154	let byte_path = &*candidate.path;
155
156	match self.strategy {
157	MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
158	MatchStrategy::BasenameLiteral(ref lit) => {
159	lit.as_bytes() == &*candidate.basename
160	}
161	MatchStrategy::Extension(ref ext) => {
162	ext.as_bytes() == &*candidate.ext
163	}
164	MatchStrategy::Prefix(ref pre) => {
165	starts_with(pre.as_bytes(), byte_path)
166	}
167	MatchStrategy::Suffix { ref suffix, component } => {
168	if component && byte_path == &suffix.as_bytes()[`1`..] {
169	return `true`;
170	}
171	ends_with(suffix.as_bytes(), byte_path)
172	}
173	MatchStrategy::RequiredExtension(ref ext) => {
174	let ext = ext.as_bytes();
175	&*candidate.ext == ext && self.re.is_match(byte_path)
176	}
177	MatchStrategy::Regex => self.re.is_match(byte_path),
178	}
179	}
180	}
181
182	/// A builder for a pattern.
183	///
184	/// This builder enables configuring the match semantics of a pattern. For
185	/// example, one can make matching case insensitive.
186	///
187	/// The lifetime `'a` refers to the lifetime of the pattern string.
188	#[derive(Clone, Debug)]
189	pub struct GlobBuilder<'a> {
190	/// The glob pattern to compile.
191	glob: &'a str,
192	/// Options for the pattern.
193	opts: GlobOptions,
194	}
195
196	#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
197	struct GlobOptions {
198	/// Whether to match case insensitively.
199	case_insensitive: bool,
200	/// Whether to require a literal separator to match a separator in a file
201	/// path. e.g., when enabled, `` won't match `/`.*
202	literal_separator: bool,
203	/// Whether or not to use `\` to escape special characters.
204	/// e.g., when enabled, `\` will match a literal ``.
205	backslash_escape: bool,
206	/// Whether or not an empty case in an alternate will be removed.
207	/// e.g., when enabled, `{,a}` will match "" and "a".
208	empty_alternates: bool,
209	}
210
211	impl GlobOptions {
212	fn default() -> GlobOptions {
213	GlobOptions {
214	case_insensitive: `false`,
215	literal_separator: `false`,
216	backslash_escape: !is_separator('`\\`'),
217	empty_alternates: `false`,
218	}
219	}
220	}
221
222	#[derive(Clone, Debug, Default, Eq, PartialEq)]
223	struct Tokens(Vec<Token>);
224
225	impl std::ops::Deref for Tokens {
226	type Target = Vec<Token>;
227	fn deref(&self) -> &Vec<Token> {
228	&self.0
229	}
230	}
231
232	impl std::ops::DerefMut for Tokens {
233	fn deref_mut(&mut self) -> &mut Vec<Token> {
234	&mut self.0
235	}
236	}
237
238	#[derive(Clone, Debug, Eq, PartialEq)]
239	enum Token {
240	Literal(char),
241	Any,
242	ZeroOrMore,
243	RecursivePrefix,
244	RecursiveSuffix,
245	RecursiveZeroOrMore,
246	Class { negated: bool, ranges: Vec<(char, char)> },
247	Alternates(Vec<Tokens>),
248	}
249
250	impl Glob {
251	/// Builds a new pattern with default options.
252	pub fn new(glob: &str) -> Result<Glob, Error> {
253	GlobBuilder::new(glob).build()
254	}
255
256	/// Returns a matcher for this pattern.
257	pub fn compile_matcher(&self) -> GlobMatcher {
258	let re =
259	new_regex(&self.re).expect("regex compilation shouldn't fail");
260	GlobMatcher { pat: self.clone(), re }
261	}
262
263	/// Returns a strategic matcher.
264	///
265	/// This isn't exposed because it's not clear whether it's actually
266	/// faster than just running a regex for a single* pattern. If it*
267	/// is faster, then GlobMatcher should do it automatically.
268	#[cfg(test)]
269	fn compile_strategic_matcher(&self) -> GlobStrategic {
270	let strategy = MatchStrategy::new(self);
271	let re =
272	new_regex(&self.re).expect("regex compilation shouldn't fail");
273	GlobStrategic { strategy, re }
274	}
275
276	/// Returns the original glob pattern used to build this pattern.
277	pub fn glob(&self) -> &str {
278	&self.glob
279	}
280
281	/// Returns the regular expression string for this glob.
282	///
283	/// Note that regular expressions for globs are intended to be matched on
284	/// arbitrary bytes (`&[u8]`) instead of Unicode strings (`&str`). In
285	/// particular, globs are frequently used on file paths, where there is no
286	/// general guarantee that file paths are themselves valid UTF-8. As a
287	/// result, callers will need to ensure that they are using a regex API
288	/// that can match on arbitrary bytes. For example, the
289	/// [`regex`](https://crates.io/regex)
290	/// crate's
291	/// [`Regex`](https://docs.rs/regex//regex/struct.Regex.html)*
292	/// API is not suitable for this since it matches on `&str`, but its
293	/// [`bytes::Regex`](https://docs.rs/regex//regex/bytes/struct.Regex.html)*
294	/// API is suitable for this.
295	pub fn regex(&self) -> &str {
296	&self.re
297	}
298
299	/// Returns the pattern as a literal if and only if the pattern must match
300	/// an entire path exactly.
301	///
302	/// The basic format of these patterns is `{literal}`.
303	fn literal(&self) -> Option<String> {
304	if self.opts.case_insensitive {
305	return None;
306	}
307	let mut lit = String::new();
308	for t in &*self.tokens {
309	let Token::Literal(c) = t else* { return None };
310	lit.push(c);
311	}
312	if lit.is_empty() {
313	None
314	} else {
315	Some(lit)
316	}
317	}
318
319	/// Returns an extension if this pattern matches a file path if and only
320	/// if the file path has the extension returned.
321	///
322	/// Note that this extension returned differs from the extension that
323	/// std::path::Path::extension returns. Namely, this extension includes
324	/// the '.'. Also, paths like `.rs` are considered to have an extension
325	/// of `.rs`.
326	fn ext(&self) -> Option<String> {
327	if self.opts.case_insensitive {
328	return None;
329	}
330	let start = match *self.tokens.get(`0`)? {
331	Token::RecursivePrefix => `1`,
332	_ => `0`,
333	};
334	match *self.tokens.get(start)? {
335	Token::ZeroOrMore => {
336	// If there was no recursive prefix, then we only permit
337	// `` if `` can match a `/`. For example, if `` can't*
338	// match `/`, then `.c` doesn't match `foo/bar.c`.*
339	if start == `0` && self.opts.literal_separator {
340	return None;
341	}
342	}
343	_ => return None,
344	}
345	match *self.tokens.get(start + `1`)? {
346	Token::Literal('.') => {}
347	_ => return None,
348	}
349	let mut lit = ".".to_string();
350	for t in self.tokens[start + `2`..].iter() {
351	match *t {
352	Token::Literal('.') \| Token::Literal('/') => return None,
353	Token::Literal(c) => lit.push(c),
354	_ => return None,
355	}
356	}
357	if lit.is_empty() {
358	None
359	} else {
360	Some(lit)
361	}
362	}
363
364	/// This is like `ext`, but returns an extension even if it isn't sufficient
365	/// to imply a match. Namely, if an extension is returned, then it is
366	/// necessary but not sufficient for a match.
367	fn required_ext(&self) -> Option<String> {
368	if self.opts.case_insensitive {
369	return None;
370	}
371	// We don't care at all about the beginning of this pattern. All we
372	// need to check for is if it ends with a literal of the form `.ext`.
373	let mut ext: Vec<char> = vec![]; // built in reverse
374	for t in self.tokens.iter().rev() {
375	match *t {
376	Token::Literal('/') => return None,
377	Token::Literal(c) => {
378	ext.push(c);
379	if c == '.' {
380	break;
381	}
382	}
383	_ => return None,
384	}
385	}
386	if ext.last() != Some(&'.') {
387	None
388	} else {
389	ext.reverse();
390	Some(ext.into_iter().collect())
391	}
392	}
393
394	/// Returns a literal prefix of this pattern if the entire pattern matches
395	/// if the literal prefix matches.
396	fn prefix(&self) -> Option<String> {
397	if self.opts.case_insensitive {
398	return None;
399	}
400	let (end, need_sep) = match *self.tokens.last()? {
401	Token::ZeroOrMore => {
402	if self.opts.literal_separator {
403	// If a trailing `` can't match a `/`, then we can't*
404	// assume a match of the prefix corresponds to a match
405	// of the overall pattern. e.g., `foo/` with*
406	// `literal_separator` enabled matches `foo/bar` but not
407	// `foo/bar/baz`, even though `foo/bar/baz` has a `foo/`
408	// literal prefix.
409	return None;
410	}
411	(self.tokens.len() - `1`, `false`)
412	}
413	Token::RecursiveSuffix => (self.tokens.len() - `1`, `true`),
414	_ => (self.tokens.len(), `false`),
415	};
416	let mut lit = String::new();
417	for t in &self.tokens[`0`..end] {
418	let Token::Literal(c) = t else* { return None };
419	lit.push(c);
420	}
421	if need_sep {
422	lit.push('/');
423	}
424	if lit.is_empty() {
425	None
426	} else {
427	Some(lit)
428	}
429	}
430
431	/// Returns a literal suffix of this pattern if the entire pattern matches
432	/// if the literal suffix matches.
433	///
434	/// If a literal suffix is returned and it must match either the entire
435	/// file path or be preceded by a `/`, then also return true. This happens
436	/// with a pattern like `/foo/bar`. Namely, this pattern matches
437	/// `foo/bar` and `baz/foo/bar`, but not `foofoo/bar`. In this case, the
438	/// suffix returned is `/foo/bar` (but should match the entire path
439	/// `foo/bar`).
440	///
441	/// When this returns true, the suffix literal is guaranteed to start with
442	/// a `/`.
443	fn suffix(&self) -> Option<(String, bool)> {
444	if self.opts.case_insensitive {
445	return None;
446	}
447	let mut lit = String::new();
448	let (start, entire) = match *self.tokens.get(`0`)? {
449	Token::RecursivePrefix => {
450	// We only care if this follows a path component if the next
451	// token is a literal.
452	if let Some(&Token::Literal(_)) = self.tokens.get(`1`) {
453	lit.push('/');
454	(`1`, `true`)
455	} else {
456	(`1`, `false`)
457	}
458	}
459	_ => (`0`, `false`),
460	};
461	let start = match *self.tokens.get(start)? {
462	Token::ZeroOrMore => {
463	// If literal_separator is enabled, then a `` can't*
464	// necessarily match everything, so reporting a suffix match
465	// as a match of the pattern would be a false positive.
466	if self.opts.literal_separator {
467	return None;
468	}
469	start + `1`
470	}
471	_ => start,
472	};
473	for t in &self.tokens[start..] {
474	let Token::Literal(c) = t else* { return None };
475	lit.push(c);
476	}
477	if lit.is_empty() \|\| lit == "/" {
478	None
479	} else {
480	Some((lit, entire))
481	}
482	}
483
484	/// If this pattern only needs to inspect the basename of a file path,
485	/// then the tokens corresponding to only the basename match are returned.
486	///
487	/// For example, given a pattern of `/.foo`, only the tokens*
488	/// corresponding to `.foo` are returned.*
489	///
490	/// Note that this will return None if any match of the basename tokens
491	/// doesn't correspond to a match of the entire pattern. For example, the
492	/// glob `foo` only matches when a file path has a basename of `foo`, but
493	/// doesn't always* match when a file path has a basename of `foo`. e.g.,*
494	/// `foo` doesn't match `abc/foo`.
495	fn basename_tokens(&self) -> Option<&[Token]> {
496	if self.opts.case_insensitive {
497	return None;
498	}
499	let start = match *self.tokens.get(`0`)? {
500	Token::RecursivePrefix => `1`,
501	_ => {
502	// With nothing to gobble up the parent portion of a path,
503	// we can't assume that matching on only the basename is
504	// correct.
505	return None;
506	}
507	};
508	if self.tokens[start..].is_empty() {
509	return None;
510	}
511	for t in self.tokens[start..].iter() {
512	match *t {
513	Token::Literal('/') => return None,
514	Token::Literal(_) => {} // OK
515	Token::Any \| Token::ZeroOrMore => {
516	if !self.opts.literal_separator {
517	// In this case, `` and `?` can match a path*
518	// separator, which means this could reach outside
519	// the basename.
520	return None;
521	}
522	}
523	Token::RecursivePrefix
524	\| Token::RecursiveSuffix
525	\| Token::RecursiveZeroOrMore => {
526	return None;
527	}
528	Token::Class { .. } \| Token::Alternates(..) => {
529	// We could* be a little smarter here, but either one*
530	// of these is going to prevent our literal optimizations
531	// anyway, so give up.
532	return None;
533	}
534	}
535	}
536	Some(&self.tokens[start..])
537	}
538
539	/// Returns the pattern as a literal if and only if the pattern exclusively
540	/// matches the basename of a file path and* is a literal.*
541	///
542	/// The basic format of these patterns is `/{literal}`, where `{literal}`
543	/// does not contain a path separator.
544	fn basename_literal(&self) -> Option<String> {
545	let tokens = self.basename_tokens()?;
546	let mut lit = String::new();
547	for t in tokens {
548	let Token::Literal(c) = t else* { return None };
549	lit.push(c);
550	}
551	Some(lit)
552	}
553	}
554
555	impl<'a> GlobBuilder<'a> {
556	/// Create a new builder for the pattern given.
557	///
558	/// The pattern is not compiled until `build` is called.
559	pub fn new(glob: &'a str) -> GlobBuilder<'a> {
560	GlobBuilder { glob, opts: GlobOptions::default() }
561	}
562
563	/// Parses and builds the pattern.
564	pub fn build(&self) -> Result<Glob, Error> {
565	let mut p = Parser {
566	glob: &self.glob,
567	stack: vec![Tokens::default()],
568	chars: self.glob.chars().peekable(),
569	prev: None,
570	cur: None,
571	opts: &self.opts,
572	};
573	p.parse()?;
574	if p.stack.is_empty() {
575	Err(Error {
576	glob: Some(self.glob.to_string()),
577	kind: ErrorKind::UnopenedAlternates,
578	})
579	} else if p.stack.len() > `1` {
580	Err(Error {
581	glob: Some(self.glob.to_string()),
582	kind: ErrorKind::UnclosedAlternates,
583	})
584	} else {
585	let tokens = p.stack.pop().unwrap();
586	Ok(Glob {
587	glob: self.glob.to_string(),
588	re: tokens.to_regex_with(&self.opts),
589	opts: self.opts,
590	tokens,
591	})
592	}
593	}
594
595	/// Toggle whether the pattern matches case insensitively or not.
596	///
597	/// This is disabled by default.
598	pub fn case_insensitive(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
599	self.opts.case_insensitive = yes;
600	self
601	}
602
603	/// Toggle whether a literal `/` is required to match a path separator.
604	///
605	/// By default this is false: `` and `?` will match `/`.*
606	pub fn literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
607	self.opts.literal_separator = yes;
608	self
609	}
610
611	/// When enabled, a back slash (`\`) may be used to escape
612	/// special characters in a glob pattern. Additionally, this will
613	/// prevent `\` from being interpreted as a path separator on all
614	/// platforms.
615	///
616	/// This is enabled by default on platforms where `\` is not a
617	/// path separator and disabled by default on platforms where `\`
618	/// is a path separator.
619	pub fn backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
620	self.opts.backslash_escape = yes;
621	self
622	}
623
624	/// Toggle whether an empty pattern in a list of alternates is accepted.
625	///
626	/// For example, if this is set then the glob `foo{,.txt}` will match both
627	/// `foo` and `foo.txt`.
628	///
629	/// By default this is false.
630	pub fn empty_alternates(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
631	self.opts.empty_alternates = yes;
632	self
633	}
634	}
635
636	impl Tokens {
637	/// Convert this pattern to a string that is guaranteed to be a valid
638	/// regular expression and will represent the matching semantics of this
639	/// glob pattern and the options given.
640	fn to_regex_with(&self, options: &GlobOptions) -> String {
641	let mut re = String::new();
642	re.push_str("(?-u)");
643	if options.case_insensitive {
644	re.push_str("(?i)");
645	}
646	re.push('^');
647	// Special case. If the entire glob is just ``, then it should match
648	// everything.
649	if self.len() == `1` && self[`0`] == Token::RecursivePrefix {
650	re.push_str(".*");
651	re.push('$');
652	return re;
653	}
654	self.tokens_to_regex(options, &self, &mut re);
655	re.push('$');
656	re
657	}
658
659	fn tokens_to_regex(
660	&self,
661	options: &GlobOptions,
662	tokens: &[Token],
663	re: &mut String,
664	) {
665	for tok in tokens.iter() {
666	match *tok {
667	Token::Literal(c) => {
668	re.push_str(&char_to_escaped_literal(c));
669	}
670	Token::Any => {
671	if options.literal_separator {
672	re.push_str("[^/]");
673	} else {
674	re.push_str(".");
675	}
676	}
677	Token::ZeroOrMore => {
678	if options.literal_separator {
679	re.push_str("[^/]*");
680	} else {
681	re.push_str(".*");
682	}
683	}
684	Token::RecursivePrefix => {
685	re.push_str("(?:/?\|.*/)");
686	}
687	Token::RecursiveSuffix => {
688	re.push_str("/.*");
689	}
690	Token::RecursiveZeroOrMore => {
691	re.push_str("(?:/\|/.*/)");
692	}
693	Token::Class { negated, ref ranges } => {
694	re.push('[');
695	if negated {
696	re.push('^');
697	}
698	for r in ranges {
699	if r.0 == r.1 {
700	// Not strictly necessary, but nicer to look at.
701	re.push_str(&char_to_escaped_literal(r.0));
702	} else {
703	re.push_str(&char_to_escaped_literal(r.0));
704	re.push('-');
705	re.push_str(&char_to_escaped_literal(r.1));
706	}
707	}
708	re.push(']');
709	}
710	Token::Alternates(ref patterns) => {
711	let mut parts = vec![];
712	for pat in patterns {
713	let mut altre = String::new();
714	self.tokens_to_regex(options, &pat, &mut altre);
715	if !altre.is_empty() \|\| options.empty_alternates {
716	parts.push(altre);
717	}
718	}
719
720	// It is possible to have an empty set in which case the
721	// resulting alternation '()' would be an error.
722	if !parts.is_empty() {
723	re.push_str("(?:");
724	re.push_str(&parts.join("\|"));
725	re.push(')');
726	}
727	}
728	}
729	}
730	}
731	}
732
733	/// Convert a Unicode scalar value to an escaped string suitable for use as
734	/// a literal in a non-Unicode regex.
735	fn char_to_escaped_literal(c: char) -> String {
736	let mut buf: [u8; 4] = [`0`; `4`];
737	let bytes: &[u8] = c.encode_utf8(&mut buf).as_bytes();
738	bytes_to_escaped_literal(bs:bytes)
739	}
740
741	/// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII
742	/// code units are converted to their escaped form.
743	fn bytes_to_escaped_literal(bs: &[u8]) -> String {
744	let mut s: String = String::with_capacity(bs.len());
745	for &b: u8 in bs {
746	if b <= `0x7F` {
747	regex_syntax::escape_into(
748	text:char::from(b).encode_utf8(&mut [`0`; `4`]),
749	&mut s,
750	);
751	} else {
752	write!(&mut s, "`\\`x{:`02`x}", b).unwrap();
753	}
754	}
755	s
756	}
757
758	struct Parser<'a> {
759	glob: &'a str,
760	stack: Vec<Tokens>,
761	chars: std::iter::Peekable<std::str::Chars<'a>>,
762	prev: Option<char>,
763	cur: Option<char>,
764	opts: &'a GlobOptions,
765	}
766
767	impl<'a> Parser<'a> {
768	fn error(&self, kind: ErrorKind) -> Error {
769	Error { glob: Some(self.glob.to_string()), kind }
770	}
771
772	fn parse(&mut self) -> Result<(), Error> {
773	while let Some(c) = self.bump() {
774	match c {
775	'?' => self.push_token(Token::Any)?,
776	'*' => self.parse_star()?,
777	'[' => self.parse_class()?,
778	'{' => self.push_alternate()?,
779	'}' => self.pop_alternate()?,
780	',' => self.parse_comma()?,
781	'`\\`' => self.parse_backslash()?,
782	c => self.push_token(Token::Literal(c))?,
783	}
784	}
785	Ok(())
786	}
787
788	fn push_alternate(&mut self) -> Result<(), Error> {
789	if self.stack.len() > `1` {
790	return Err(self.error(ErrorKind::NestedAlternates));
791	}
792	Ok(self.stack.push(Tokens::default()))
793	}
794
795	fn pop_alternate(&mut self) -> Result<(), Error> {
796	let mut alts = vec![];
797	while self.stack.len() >= `2` {
798	alts.push(self.stack.pop().unwrap());
799	}
800	self.push_token(Token::Alternates(alts))
801	}
802
803	fn push_token(&mut self, tok: Token) -> Result<(), Error> {
804	if let Some(ref mut pat) = self.stack.last_mut() {
805	return Ok(pat.push(tok));
806	}
807	Err(self.error(ErrorKind::UnopenedAlternates))
808	}
809
810	fn pop_token(&mut self) -> Result<Token, Error> {
811	if let Some(ref mut pat) = self.stack.last_mut() {
812	return Ok(pat.pop().unwrap());
813	}
814	Err(self.error(ErrorKind::UnopenedAlternates))
815	}
816
817	fn have_tokens(&self) -> Result<bool, Error> {
818	match self.stack.last() {
819	None => Err(self.error(ErrorKind::UnopenedAlternates)),
820	Some(ref pat) => Ok(!pat.is_empty()),
821	}
822	}
823
824	fn parse_comma(&mut self) -> Result<(), Error> {
825	// If we aren't inside a group alternation, then don't
826	// treat commas specially. Otherwise, we need to start
827	// a new alternate.
828	if self.stack.len() <= `1` {
829	self.push_token(Token::Literal(','))
830	} else {
831	Ok(self.stack.push(Tokens::default()))
832	}
833	}
834
835	fn parse_backslash(&mut self) -> Result<(), Error> {
836	if self.opts.backslash_escape {
837	match self.bump() {
838	None => Err(self.error(ErrorKind::DanglingEscape)),
839	Some(c) => self.push_token(Token::Literal(c)),
840	}
841	} else if is_separator('`\\`') {
842	// Normalize all patterns to use / as a separator.
843	self.push_token(Token::Literal('/'))
844	} else {
845	self.push_token(Token::Literal('`\\`'))
846	}
847	}
848
849	fn parse_star(&mut self) -> Result<(), Error> {
850	let prev = self.prev;
851	if self.peek() != Some('*') {
852	self.push_token(Token::ZeroOrMore)?;
853	return Ok(());
854	}
855	assert!(self.bump() == Some('*'));
856	if !self.have_tokens()? {
857	if !self.peek().map_or(`true`, is_separator) {
858	self.push_token(Token::ZeroOrMore)?;
859	self.push_token(Token::ZeroOrMore)?;
860	} else {
861	self.push_token(Token::RecursivePrefix)?;
862	assert!(self.bump().map_or(`true`, is_separator));
863	}
864	return Ok(());
865	}
866
867	if !prev.map(is_separator).unwrap_or(`false`) {
868	if self.stack.len() <= `1`
869	\|\| (prev != Some(',') && prev != Some('{'))
870	{
871	self.push_token(Token::ZeroOrMore)?;
872	self.push_token(Token::ZeroOrMore)?;
873	return Ok(());
874	}
875	}
876	let is_suffix = match self.peek() {
877	None => {
878	assert!(self.bump().is_none());
879	`true`
880	}
881	Some(',') \| Some('}') if self.stack.len() >= `2` => `true`,
882	Some(c) if is_separator(c) => {
883	assert!(self.bump().map(is_separator).unwrap_or(`false`));
884	`false`
885	}
886	_ => {
887	self.push_token(Token::ZeroOrMore)?;
888	self.push_token(Token::ZeroOrMore)?;
889	return Ok(());
890	}
891	};
892	match self.pop_token()? {
893	Token::RecursivePrefix => {
894	self.push_token(Token::RecursivePrefix)?;
895	}
896	Token::RecursiveSuffix => {
897	self.push_token(Token::RecursiveSuffix)?;
898	}
899	_ => {
900	if is_suffix {
901	self.push_token(Token::RecursiveSuffix)?;
902	} else {
903	self.push_token(Token::RecursiveZeroOrMore)?;
904	}
905	}
906	}
907	Ok(())
908	}
909
910	fn parse_class(&mut self) -> Result<(), Error> {
911	fn add_to_last_range(
912	glob: &str,
913	r: &mut (char, char),
914	add: char,
915	) -> Result<(), Error> {
916	r.1 = add;
917	if r.1 < r.0 {
918	Err(Error {
919	glob: Some(glob.to_string()),
920	kind: ErrorKind::InvalidRange(r.0, r.1),
921	})
922	} else {
923	Ok(())
924	}
925	}
926	let mut ranges = vec![];
927	let negated = match self.chars.peek() {
928	Some(&'!') \| Some(&'^') => {
929	let bump = self.bump();
930	assert!(bump == Some('!') \|\| bump == Some('^'));
931	`true`
932	}
933	_ => `false`,
934	};
935	let mut first = `true`;
936	let mut in_range = `false`;
937	loop {
938	let c = match self.bump() {
939	Some(c) => c,
940	// The only way to successfully break this loop is to observe
941	// a ']'.
942	None => return Err(self.error(ErrorKind::UnclosedClass)),
943	};
944	match c {
945	']' => {
946	if first {
947	ranges.push((']', ']'));
948	} else {
949	break;
950	}
951	}
952	'-' => {
953	if first {
954	ranges.push(('-', '-'));
955	} else if in_range {
956	// invariant: in_range is only set when there is
957	// already at least one character seen.
958	let r = ranges.last_mut().unwrap();
959	add_to_last_range(&self.glob, r, '-')?;
960	in_range = `false`;
961	} else {
962	assert!(!ranges.is_empty());
963	in_range = `true`;
964	}
965	}
966	c => {
967	if in_range {
968	// invariant: in_range is only set when there is
969	// already at least one character seen.
970	add_to_last_range(
971	&self.glob,
972	ranges.last_mut().unwrap(),
973	c,
974	)?;
975	} else {
976	ranges.push((c, c));
977	}
978	in_range = `false`;
979	}
980	}
981	first = `false`;
982	}
983	if in_range {
984	// Means that the last character in the class was a '-', so add
985	// it as a literal.
986	ranges.push(('-', '-'));
987	}
988	self.push_token(Token::Class { negated, ranges })
989	}
990
991	fn bump(&mut self) -> Option<char> {
992	self.prev = self.cur;
993	self.cur = self.chars.next();
994	self.cur
995	}
996
997	fn peek(&mut self) -> Option<char> {
998	self.chars.peek().map(\|&ch\| ch)
999	}
1000	}
1001
1002	#[cfg(test)]
1003	fn starts_with(needle: &[u8], haystack: &[u8]) -> bool {
1004	needle.len() <= haystack.len() && needle == &haystack[..needle.len()]
1005	}
1006
1007	#[cfg(test)]
1008	fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
1009	if needle.len() > haystack.len() {
1010	return `false`;
1011	}
1012	needle == &haystack[haystack.len() - needle.len()..]
1013	}
1014
1015	#[cfg(test)]
1016	mod tests {
1017	use super::Token::*;
1018	use super::{Glob, GlobBuilder, Token};
1019	use crate::{ErrorKind, GlobSetBuilder};
1020
1021	#[derive(Clone, Copy, Debug, Default)]
1022	struct Options {
1023	casei: Option<bool>,
1024	litsep: Option<bool>,
1025	bsesc: Option<bool>,
1026	ealtre: Option<bool>,
1027	}
1028
1029	macro_rules! syntax {
1030	($name:ident, $pat:expr, $tokens:expr) => {
1031	#[test]
1032	fn $name() {
1033	let pat = Glob::new($pat).unwrap();
1034	assert_eq!($tokens, pat.tokens.`0`);
1035	}
1036	};
1037	}
1038
1039	macro_rules! syntaxerr {
1040	($name:ident, $pat:expr, $err:expr) => {
1041	#[test]
1042	fn $name() {
1043	let err = Glob::new($pat).unwrap_err();
1044	assert_eq!(&$err, err.kind());
1045	}
1046	};
1047	}
1048
1049	macro_rules! toregex {
1050	($name:ident, $pat:expr, $re:expr) => {
1051	toregex!($name, $pat, $re, Options::default());
1052	};
1053	($name:ident, $pat:expr, $re:expr, $options:expr) => {
1054	#[test]
1055	fn $name() {
1056	let mut builder = GlobBuilder::new($pat);
1057	if let Some(casei) = $options.casei {
1058	builder.case_insensitive(casei);
1059	}
1060	if let Some(litsep) = $options.litsep {
1061	builder.literal_separator(litsep);
1062	}
1063	if let Some(bsesc) = $options.bsesc {
1064	builder.backslash_escape(bsesc);
1065	}
1066	if let Some(ealtre) = $options.ealtre {
1067	builder.empty_alternates(ealtre);
1068	}
1069	let pat = builder.build().unwrap();
1070	assert_eq!(format!("(?-u){}", $re), pat.regex());
1071	}
1072	};
1073	}
1074
1075	macro_rules! matches {
1076	($name:ident, $pat:expr, $path:expr) => {
1077	matches!($name, $pat, $path, Options::default());
1078	};
1079	($name:ident, $pat:expr, $path:expr, $options:expr) => {
1080	#[test]
1081	fn $name() {
1082	let mut builder = GlobBuilder::new($pat);
1083	if let Some(casei) = $options.casei {
1084	builder.case_insensitive(casei);
1085	}
1086	if let Some(litsep) = $options.litsep {
1087	builder.literal_separator(litsep);
1088	}
1089	if let Some(bsesc) = $options.bsesc {
1090	builder.backslash_escape(bsesc);
1091	}
1092	if let Some(ealtre) = $options.ealtre {
1093	builder.empty_alternates(ealtre);
1094	}
1095	let pat = builder.build().unwrap();
1096	let matcher = pat.compile_matcher();
1097	let strategic = pat.compile_strategic_matcher();
1098	let set = GlobSetBuilder::new().add(pat).build().unwrap();
1099	assert!(matcher.is_match($path));
1100	assert!(strategic.is_match($path));
1101	assert!(set.is_match($path));
1102	}
1103	};
1104	}
1105
1106	macro_rules! nmatches {
1107	($name:ident, $pat:expr, $path:expr) => {
1108	nmatches!($name, $pat, $path, Options::default());
1109	};
1110	($name:ident, $pat:expr, $path:expr, $options:expr) => {
1111	#[test]
1112	fn $name() {
1113	let mut builder = GlobBuilder::new($pat);
1114	if let Some(casei) = $options.casei {
1115	builder.case_insensitive(casei);
1116	}
1117	if let Some(litsep) = $options.litsep {
1118	builder.literal_separator(litsep);
1119	}
1120	if let Some(bsesc) = $options.bsesc {
1121	builder.backslash_escape(bsesc);
1122	}
1123	if let Some(ealtre) = $options.ealtre {
1124	builder.empty_alternates(ealtre);
1125	}
1126	let pat = builder.build().unwrap();
1127	let matcher = pat.compile_matcher();
1128	let strategic = pat.compile_strategic_matcher();
1129	let set = GlobSetBuilder::new().add(pat).build().unwrap();
1130	assert!(!matcher.is_match($path));
1131	assert!(!strategic.is_match($path));
1132	assert!(!set.is_match($path));
1133	}
1134	};
1135	}
1136
1137	fn s(string: &str) -> String {
1138	string.to_string()
1139	}
1140
1141	fn class(s: char, e: char) -> Token {
1142	Class { negated: `false`, ranges: vec![(s, e)] }
1143	}
1144
1145	fn classn(s: char, e: char) -> Token {
1146	Class { negated: `true`, ranges: vec![(s, e)] }
1147	}
1148
1149	fn rclass(ranges: &[(char, char)]) -> Token {
1150	Class { negated: `false`, ranges: ranges.to_vec() }
1151	}
1152
1153	fn rclassn(ranges: &[(char, char)]) -> Token {
1154	Class { negated: `true`, ranges: ranges.to_vec() }
1155	}
1156
1157	syntax!(literal1, "a", vec![Literal('a')]);
1158	syntax!(literal2, "ab", vec![Literal('a'), Literal('b')]);
1159	syntax!(any1, "?", vec![Any]);
1160	syntax!(any2, "a?b", vec![Literal('a'), Any, Literal('b')]);
1161	syntax!(seq1, "*", vec![ZeroOrMore]);
1162	syntax!(seq2, "a*b", vec![Literal('a'), ZeroOrMore, Literal('b')]);
1163	syntax!(
1164	seq3,
1165	"ab*",
1166	vec![ZeroOrMore, Literal('a'), ZeroOrMore, Literal('b'), ZeroOrMore,]
1167	);
1168	syntax!(rseq1, "**", vec![RecursivePrefix]);
1169	syntax!(rseq2, "**/", vec![RecursivePrefix]);
1170	syntax!(rseq3, "/**", vec![RecursiveSuffix]);
1171	syntax!(rseq4, "/**/", vec![RecursiveZeroOrMore]);
1172	syntax!(
1173	rseq5,
1174	"a/**/b",
1175	vec![Literal('a'), RecursiveZeroOrMore, Literal('b'),]
1176	);
1177	syntax!(cls1, "[a]", vec![class('a', 'a')]);
1178	syntax!(cls2, "[!a]", vec![classn('a', 'a')]);
1179	syntax!(cls3, "[a-z]", vec![class('a', 'z')]);
1180	syntax!(cls4, "[!a-z]", vec![classn('a', 'z')]);
1181	syntax!(cls5, "[-]", vec![class('-', '-')]);
1182	syntax!(cls6, "[]]", vec![class(']', ']')]);
1183	syntax!(cls7, "[]", vec![class('', '*')]);
1184	syntax!(cls8, "[!!]", vec![classn('!', '!')]);
1185	syntax!(cls9, "[a-]", vec![rclass(&[('a', 'a'), ('-', '-')])]);
1186	syntax!(cls10, "[-a-z]", vec![rclass(&[('-', '-'), ('a', 'z')])]);
1187	syntax!(cls11, "[a-z-]", vec![rclass(&[('a', 'z'), ('-', '-')])]);
1188	syntax!(
1189	cls12,
1190	"[-a-z-]",
1191	vec![rclass(&[('-', '-'), ('a', 'z'), ('-', '-')]),]
1192	);
1193	syntax!(cls13, "[]-z]", vec![class(']', 'z')]);
1194	syntax!(cls14, "[--z]", vec![class('-', 'z')]);
1195	syntax!(cls15, "[ --]", vec![class(' ', '-')]);
1196	syntax!(cls16, "[0-9a-z]", vec![rclass(&[('0', '9'), ('a', 'z')])]);
1197	syntax!(cls17, "[a-z0-9]", vec![rclass(&[('a', 'z'), ('0', '9')])]);
1198	syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]);
1199	syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]);
1200	syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
1201	syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
1202
1203	syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
1204	syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
1205	syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
1206	syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass);
1207	syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
1208	syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
1209
1210	const CASEI: Options =
1211	Options { casei: Some(`true`), litsep: None, bsesc: None, ealtre: None };
1212	const SLASHLIT: Options =
1213	Options { casei: None, litsep: Some(`true`), bsesc: None, ealtre: None };
1214	const NOBSESC: Options = Options {
1215	casei: None,
1216	litsep: None,
1217	bsesc: Some(`false`),
1218	ealtre: None,
1219	};
1220	const BSESC: Options =
1221	Options { casei: None, litsep: None, bsesc: Some(`true`), ealtre: None };
1222	const EALTRE: Options = Options {
1223	casei: None,
1224	litsep: None,
1225	bsesc: Some(`true`),
1226	ealtre: Some(`true`),
1227	};
1228
1229	toregex!(re_casei, "a", "(?i)^a$", &CASEI);
1230
1231	toregex!(re_slash1, "?", r"^[^/]$", SLASHLIT);
1232	toregex!(re_slash2, "", r"^[^/]$", SLASHLIT);
1233
1234	toregex!(re1, "a", "^a$");
1235	toregex!(re2, "?", "^.$");
1236	toregex!(re3, "", "^.$");
1237	toregex!(re4, "a?", "^a.$");
1238	toregex!(re5, "?a", "^.a$");
1239	toregex!(re6, "a", "^a.$");
1240	toregex!(re7, "a", "^.a$");
1241	toregex!(re8, "[]", r"^[\]$");
1242	toregex!(re9, "[+]", r"^[\+]$");
1243	toregex!(re10, "+", r"^\+$");
1244	toregex!(re11, "☃", r"^\xe2\x98\x83$");
1245	toregex!(re12, "*", r"^.$");
1246	toregex!(re13, "*/", r"^.$");
1247	toregex!(re14, "*/", r"^(?:/?\|./).$");
1248	toregex!(re15, "/", r"^.*$");
1249	toregex!(re16, "//", r"^(?:/?\|./).*$");
1250	toregex!(re17, "//*", r"^.$");
1251	toregex!(re18, "//*/", r"^(?:/?\|./).$");
1252	toregex!(re19, "a/*", r"^a/.$");
1253	toregex!(re20, "a//", r"^a/.*$");
1254	toregex!(re21, "a///*", r"^a/.$");
1255	toregex!(re22, "a/*/b", r"^a(?:/\|/./)b$");
1256	toregex!(re23, "a///b", r"^a(?:/\|/.*/)b$");
1257	toregex!(re24, "a///*/b", r"^a(?:/\|/./)b$");
1258	toregex!(re25, "*/b", r"^(?:/?\|./)b$");
1259	toregex!(re26, "//b", r"^(?:/?\|.*/)b$");
1260	toregex!(re27, "//*/b", r"^(?:/?\|./)b$");
1261	toregex!(re28, "a*", r"^a..*$");
1262	toregex!(re29, "*a", r"^..*a$");
1263	toregex!(re30, "a*b", r"^a..*b$");
1264	toregex!(re31, "**", r"^...$");
1265	toregex!(re32, "/a*", r"^/a..*$");
1266	toregex!(re33, "/*a", r"^/..*a$");
1267	toregex!(re34, "/a*b", r"^/a..*b$");
1268	toregex!(re35, "{a,b}", r"^(?:b\|a)$");
1269
1270	matches!(match1, "a", "a");
1271	matches!(match2, "a*b", "a_b");
1272	matches!(match3, "abc", "abc");
1273	matches!(match4, "abc", "a_b_c");
1274	matches!(match5, "abc", "a___b___c");
1275	matches!(match6, "abcabcabc", "abcabcabcabcabcabcabc");
1276	matches!(match7, "aaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1277	matches!(match8, "ab[xyz]cd", "abxcdbxcddd");
1278	matches!(match9, "*.rs", ".rs");
1279	matches!(match10, "☃", "☃");
1280
1281	matches!(matchrec1, "some/**/needle.txt", "some/needle.txt");
1282	matches!(matchrec2, "some/**/needle.txt", "some/one/needle.txt");
1283	matches!(matchrec3, "some/**/needle.txt", "some/one/two/needle.txt");
1284	matches!(matchrec4, "some/**/needle.txt", "some/other/needle.txt");
1285	matches!(matchrec5, "**", "abcde");
1286	matches!(matchrec6, "**", "");
1287	matches!(matchrec7, "**", ".asdf");
1288	matches!(matchrec8, "**", "/x/.asdf");
1289	matches!(matchrec9, "some///needle.txt", "some/needle.txt");
1290	matches!(matchrec10, "some///needle.txt", "some/one/needle.txt");
1291	matches!(matchrec11, "some///needle.txt", "some/one/two/needle.txt");
1292	matches!(matchrec12, "some///needle.txt", "some/other/needle.txt");
1293	matches!(matchrec13, "**/test", "one/two/test");
1294	matches!(matchrec14, "**/test", "one/test");
1295	matches!(matchrec15, "**/test", "test");
1296	matches!(matchrec16, "/**/test", "/one/two/test");
1297	matches!(matchrec17, "/**/test", "/one/test");
1298	matches!(matchrec18, "/**/test", "/test");
1299	matches!(matchrec19, "*/.", ".abc");
1300	matches!(matchrec20, "*/.", "abc/.abc");
1301	matches!(matchrec21, "**/foo/bar", "foo/bar");
1302	matches!(matchrec22, "./*", ".abc/abc");
1303	matches!(matchrec23, "test/**", "test/");
1304	matches!(matchrec24, "test/**", "test/one");
1305	matches!(matchrec25, "test/**", "test/one/two");
1306	matches!(matchrec26, "some/*/needle.txt", "some/one/needle.txt");
1307
1308	matches!(matchrange1, "a[0-9]b", "a0b");
1309	matches!(matchrange2, "a[0-9]b", "a9b");
1310	matches!(matchrange3, "a[!0-9]b", "a_b");
1311	matches!(matchrange4, "[a-z123]", "1");
1312	matches!(matchrange5, "[1a-z23]", "1");
1313	matches!(matchrange6, "[123a-z]", "1");
1314	matches!(matchrange7, "[abc-]", "-");
1315	matches!(matchrange8, "[-abc]", "-");
1316	matches!(matchrange9, "[-a-c]", "b");
1317	matches!(matchrange10, "[a-c-]", "b");
1318	matches!(matchrange11, "[-]", "-");
1319	matches!(matchrange12, "a[^0-9]b", "a_b");
1320
1321	matches!(matchpat1, "*hello.txt", "hello.txt");
1322	matches!(matchpat2, "*hello.txt", "gareth_says_hello.txt");
1323	matches!(matchpat3, "*hello.txt", "some/path/to/hello.txt");
1324	matches!(matchpat4, "*hello.txt", "some`\\`path`\\`to`\\`hello.txt");
1325	matches!(matchpat5, "*hello.txt", "/an/absolute/path/to/hello.txt");
1326	matches!(matchpat6, "*some/path/to/hello.txt", "some/path/to/hello.txt");
1327	matches!(
1328	matchpat7,
1329	"*some/path/to/hello.txt",
1330	"a/bigger/some/path/to/hello.txt"
1331	);
1332
1333	matches!(matchescape, "_[[]_[]]_[?]_[]_!_", "_[_]_?__!_");
1334
1335	matches!(matchcasei1, "aBcDeFg", "aBcDeFg", CASEI);
1336	matches!(matchcasei2, "aBcDeFg", "abcdefg", CASEI);
1337	matches!(matchcasei3, "aBcDeFg", "ABCDEFG", CASEI);
1338	matches!(matchcasei4, "aBcDeFg", "AbCdEfG", CASEI);
1339
1340	matches!(matchalt1, "a,b", "a,b");
1341	matches!(matchalt2, ",", ",");
1342	matches!(matchalt3, "{a,b}", "a");
1343	matches!(matchalt4, "{a,b}", "b");
1344	matches!(matchalt5, "{/src/,foo}", "abc/src/bar");
1345	matches!(matchalt6, "{/src/,foo}", "foo");
1346	matches!(matchalt7, "{[}],foo}", "}");
1347	matches!(matchalt8, "{foo}", "foo");
1348	matches!(matchalt9, "{}", "");
1349	matches!(matchalt10, "{,}", "");
1350	matches!(matchalt11, "{.foo,.bar,*.wat}", "test.foo");
1351	matches!(matchalt12, "{.foo,.bar,*.wat}", "test.bar");
1352	matches!(matchalt13, "{.foo,.bar,*.wat}", "test.wat");
1353	matches!(matchalt14, "foo{,.txt}", "foo.txt");
1354	nmatches!(matchalt15, "foo{,.txt}", "foo");
1355	matches!(matchalt16, "foo{,.txt}", "foo", EALTRE);
1356
1357	matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
1358	#[cfg(unix)]
1359	nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
1360	#[cfg(not(unix))]
1361	nmatches!(matchslash2, "abc?def", "abc`\\`def", SLASHLIT);
1362	nmatches!(matchslash3, "abc*def", "abc/def", SLASHLIT);
1363	matches!(matchslash4, "abc[/]def", "abc/def", SLASHLIT); // differs
1364	#[cfg(unix)]
1365	nmatches!(matchslash5, "abc`\\`def", "abc/def", SLASHLIT);
1366	#[cfg(not(unix))]
1367	matches!(matchslash5, "abc`\\`def", "abc/def", SLASHLIT);
1368
1369	matches!(matchbackslash1, "`\\`[", "[", BSESC);
1370	matches!(matchbackslash2, "`\\`?", "?", BSESC);
1371	matches!(matchbackslash3, "`\\`", "", BSESC);
1372	matches!(matchbackslash4, "`\\`[a-z]", "`\\`a", NOBSESC);
1373	matches!(matchbackslash5, "`\\`?", "`\\`a", NOBSESC);
1374	matches!(matchbackslash6, "`\\`*", "`\\\\`", NOBSESC);
1375	#[cfg(unix)]
1376	matches!(matchbackslash7, "`\\`a", "a");
1377	#[cfg(not(unix))]
1378	matches!(matchbackslash8, "`\\`a", "/a");
1379
1380	nmatches!(matchnot1, "abc", "abcd");
1381	nmatches!(matchnot2, "abcabcabc", "abcabcabcabcabcabcabca");
1382	nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt");
1383	nmatches!(matchnot4, "some///needle.txt", "some/other/notthis.txt");
1384	nmatches!(matchnot5, "/**/test", "test");
1385	nmatches!(matchnot6, "/**/test", "/one/notthis");
1386	nmatches!(matchnot7, "/**/test", "/notthis");
1387	nmatches!(matchnot8, "*/.", "ab.c");
1388	nmatches!(matchnot9, "*/.", "abc/ab.c");
1389	nmatches!(matchnot10, "./*", "a.bc");
1390	nmatches!(matchnot11, "./*", "abc/a.bc");
1391	nmatches!(matchnot12, "a[0-9]b", "a_b");
1392	nmatches!(matchnot13, "a[!0-9]b", "a0b");
1393	nmatches!(matchnot14, "a[!0-9]b", "a9b");
1394	nmatches!(matchnot15, "[!-]", "-");
1395	nmatches!(matchnot16, "*hello.txt", "hello.txt-and-then-some");
1396	nmatches!(matchnot17, "*hello.txt", "goodbye.txt");
1397	nmatches!(
1398	matchnot18,
1399	"*some/path/to/hello.txt",
1400	"some/path/to/hello.txt-and-then-some"
1401	);
1402	nmatches!(
1403	matchnot19,
1404	"*some/path/to/hello.txt",
1405	"some/other/path/to/hello.txt"
1406	);
1407	nmatches!(matchnot20, "a", "foo/a");
1408	nmatches!(matchnot21, "./foo", "foo");
1409	nmatches!(matchnot22, "**/foo", "foofoo");
1410	nmatches!(matchnot23, "**/foo/bar", "foofoo/bar");
1411	nmatches!(matchnot24, "/*.c", "mozilla-sha1/sha1.c");
1412	nmatches!(matchnot25, "*.c", "mozilla-sha1/sha1.c", SLASHLIT);
1413	nmatches!(
1414	matchnot26,
1415	"**/m4/ltoptions.m4",
1416	"csharp/src/packages/repositories.config",
1417	SLASHLIT
1418	);
1419	nmatches!(matchnot27, "a[^0-9]b", "a0b");
1420	nmatches!(matchnot28, "a[^0-9]b", "a9b");
1421	nmatches!(matchnot29, "[^-]", "-");
1422	nmatches!(matchnot30, "some/*/needle.txt", "some/needle.txt");
1423	nmatches!(
1424	matchrec31,
1425	"some/*/needle.txt",
1426	"some/one/two/needle.txt",
1427	SLASHLIT
1428	);
1429	nmatches!(
1430	matchrec32,
1431	"some/*/needle.txt",
1432	"some/one/two/three/needle.txt",
1433	SLASHLIT
1434	);
1435	nmatches!(matchrec33, "./*", ".abc");
1436	nmatches!(matchrec34, "foo/**", "foo");
1437
1438	macro_rules! extract {
1439	($which:ident, $name:ident, $pat:expr, $expect:expr) => {
1440	extract!($which, $name, $pat, $expect, Options::default());
1441	};
1442	($which:ident, $name:ident, $pat:expr, $expect:expr, $options:expr) => {
1443	#[test]
1444	fn $name() {
1445	let mut builder = GlobBuilder::new($pat);
1446	if let Some(casei) = $options.casei {
1447	builder.case_insensitive(casei);
1448	}
1449	if let Some(litsep) = $options.litsep {
1450	builder.literal_separator(litsep);
1451	}
1452	if let Some(bsesc) = $options.bsesc {
1453	builder.backslash_escape(bsesc);
1454	}
1455	if let Some(ealtre) = $options.ealtre {
1456	builder.empty_alternates(ealtre);
1457	}
1458	let pat = builder.build().unwrap();
1459	assert_eq!($expect, pat.$which());
1460	}
1461	};
1462	}
1463
1464	macro_rules! literal {
1465	($($tt:tt)) => { extract!(literal, $($tt)); }
1466	}
1467
1468	macro_rules! basetokens {
1469	($($tt:tt)) => { extract!(basename_tokens, $($tt)); }
1470	}
1471
1472	macro_rules! ext {
1473	($($tt:tt)) => { extract!(ext, $($tt)); }
1474	}
1475
1476	macro_rules! required_ext {
1477	($($tt:tt)) => { extract!(required_ext, $($tt)); }
1478	}
1479
1480	macro_rules! prefix {
1481	($($tt:tt)) => { extract!(prefix, $($tt)); }
1482	}
1483
1484	macro_rules! suffix {
1485	($($tt:tt)) => { extract!(suffix, $($tt)); }
1486	}
1487
1488	macro_rules! baseliteral {
1489	($($tt:tt)) => { extract!(basename_literal, $($tt)); }
1490	}
1491
1492	literal!(extract_lit1, "foo", Some(s("foo")));
1493	literal!(extract_lit2, "foo", None, CASEI);
1494	literal!(extract_lit3, "/foo", Some(s("/foo")));
1495	literal!(extract_lit4, "/foo/", Some(s("/foo/")));
1496	literal!(extract_lit5, "/foo/bar", Some(s("/foo/bar")));
1497	literal!(extract_lit6, "*.foo", None);
1498	literal!(extract_lit7, "foo/bar", Some(s("foo/bar")));
1499	literal!(extract_lit8, "**/foo/bar", None);
1500
1501	basetokens!(
1502	extract_basetoks1,
1503	"**/foo",
1504	Some(&*vec![Literal('f'), Literal('o'), Literal('o'),])
1505	);
1506	basetokens!(extract_basetoks2, "**/foo", None, CASEI);
1507	basetokens!(
1508	extract_basetoks3,
1509	"**/foo",
1510	Some(&*vec![Literal('f'), Literal('o'), Literal('o'),]),
1511	SLASHLIT
1512	);
1513	basetokens!(extract_basetoks4, "*foo", None, SLASHLIT);
1514	basetokens!(extract_basetoks5, "*foo", None);
1515	basetokens!(extract_basetoks6, "*/foo", None);
1516	basetokens!(
1517	extract_basetoks7,
1518	"*/foo",
1519	Some(&*vec![Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),]),
1520	SLASHLIT
1521	);
1522
1523	ext!(extract_ext1, "*/.rs", Some(s(".rs")));
1524	ext!(extract_ext2, "*/.rs.bak", None);
1525	ext!(extract_ext3, "*.rs", Some(s(".rs")));
1526	ext!(extract_ext4, "a*.rs", None);
1527	ext!(extract_ext5, "/*.c", None);
1528	ext!(extract_ext6, "*.c", None, SLASHLIT);
1529	ext!(extract_ext7, "*.c", Some(s(".c")));
1530
1531	required_ext!(extract_req_ext1, "*.rs", Some(s(".rs")));
1532	required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(s(".rs")));
1533	required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(s(".rs")));
1534	required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(s(".rs")));
1535	required_ext!(extract_req_ext5, ".rs", Some(s(".rs")));
1536	required_ext!(extract_req_ext6, "./rs", None);
1537	required_ext!(extract_req_ext7, "foo", None);
1538	required_ext!(extract_req_ext8, ".foo/", None);
1539	required_ext!(extract_req_ext9, "foo/", None);
1540
1541	prefix!(extract_prefix1, "/foo", Some(s("/foo")));
1542	prefix!(extract_prefix2, "/foo/*", Some(s("/foo/")));
1543	prefix!(extract_prefix3, "**/foo", None);
1544	prefix!(extract_prefix4, "foo/**", Some(s("foo/")));
1545
1546	suffix!(extract_suffix1, "**/foo/bar", Some((s("/foo/bar"), `true`)));
1547	suffix!(extract_suffix2, "*/foo/bar", Some((s("/foo/bar"), `false`)));
1548	suffix!(extract_suffix3, "*/foo/bar", None, SLASHLIT);
1549	suffix!(extract_suffix4, "foo/bar", Some((s("foo/bar"), `false`)));
1550	suffix!(extract_suffix5, "*.foo", Some((s(".foo"), `false`)));
1551	suffix!(extract_suffix6, "*.foo", None, SLASHLIT);
1552	suffix!(extract_suffix7, "*/_test", Some((s("_test"), `false`)));
1553
1554	baseliteral!(extract_baselit1, "**/foo", Some(s("foo")));
1555	baseliteral!(extract_baselit2, "foo", None);
1556	baseliteral!(extract_baselit3, "*foo", None);
1557	baseliteral!(extract_baselit4, "*/foo", None);
1558	}
1559