glob.rs source code [crates/globset-0.4.10/src/glob.rs]

1	use std::fmt;
2	use std::hash;
3	use std::iter;
4	use std::ops::{Deref, DerefMut};
5	use std::path::{is_separator, Path};
6	use std::str;
7
8	use regex;
9	use regex::bytes::Regex;
10
11	use crate::{new_regex, Candidate, Error, ErrorKind};
12
13	/// Describes a matching strategy for a particular pattern.
14	///
15	/// This provides a way to more quickly determine whether a pattern matches
16	/// a particular file path in a way that scales with a large number of
17	/// patterns. For example, if many patterns are of the form `.ext`, then it's*
18	/// possible to test whether any of those patterns matches by looking up a
19	/// file path's extension in a hash table.
20	#[derive(Clone, Debug, Eq, PartialEq)]
21	pub enum MatchStrategy {
22	/// A pattern matches if and only if the entire file path matches this
23	/// literal string.
24	Literal(String),
25	/// A pattern matches if and only if the file path's basename matches this
26	/// literal string.
27	BasenameLiteral(String),
28	/// A pattern matches if and only if the file path's extension matches this
29	/// literal string.
30	Extension(String),
31	/// A pattern matches if and only if this prefix literal is a prefix of the
32	/// candidate file path.
33	Prefix(String),
34	/// A pattern matches if and only if this prefix literal is a prefix of the
35	/// candidate file path.
36	///
37	/// An exception: if `component` is true, then `suffix` must appear at the
38	/// beginning of a file path or immediately following a `/`.
39	Suffix {
40	/// The actual suffix.
41	suffix: String,
42	/// Whether this must start at the beginning of a path component.
43	component: bool,
44	},
45	/// A pattern matches only if the given extension matches the file path's
46	/// extension. Note that this is a necessary but NOT sufficient criterion.
47	/// Namely, if the extension matches, then a full regex search is still
48	/// required.
49	RequiredExtension(String),
50	/// A regex needs to be used for matching.
51	Regex,
52	}
53
54	impl MatchStrategy {
55	/// Returns a matching strategy for the given pattern.
56	pub fn new(pat: &Glob) -> MatchStrategy {
57	if let Some(lit: String) = pat.basename_literal() {
58	MatchStrategy::BasenameLiteral(lit)
59	} else if let Some(lit: String) = pat.literal() {
60	MatchStrategy::Literal(lit)
61	} else if let Some(ext: String) = pat.ext() {
62	MatchStrategy::Extension(ext)
63	} else if let Some(prefix: String) = pat.prefix() {
64	MatchStrategy::Prefix(prefix)
65	} else if let Some((suffix: String, component: bool)) = pat.suffix() {
66	MatchStrategy::Suffix { suffix: suffix, component: component }
67	} else if let Some(ext: String) = pat.required_ext() {
68	MatchStrategy::RequiredExtension(ext)
69	} else {
70	MatchStrategy::Regex
71	}
72	}
73	}
74
75	/// Glob represents a successfully parsed shell glob pattern.
76	///
77	/// It cannot be used directly to match file paths, but it can be converted
78	/// to a regular expression string or a matcher.
79	#[derive(Clone, Debug, Eq)]
80	pub struct Glob {
81	glob: String,
82	re: String,
83	opts: GlobOptions,
84	tokens: Tokens,
85	}
86
87	impl PartialEq for Glob {
88	fn eq(&self, other: &Glob) -> bool {
89	self.glob == other.glob && self.opts == other.opts
90	}
91	}
92
93	impl hash::Hash for Glob {
94	fn hash<H: hash::Hasher>(&self, state: &mut H) {
95	self.glob.hash(state);
96	self.opts.hash(state);
97	}
98	}
99
100	impl fmt::Display for Glob {
101	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102	self.glob.fmt(f)
103	}
104	}
105
106	impl str::FromStr for Glob {
107	type Err = Error;
108
109	fn from_str(glob: &str) -> Result<Self, Self::Err> {
110	Self::new(glob)
111	}
112	}
113
114	/// A matcher for a single pattern.
115	#[derive(Clone, Debug)]
116	pub struct GlobMatcher {
117	/// The underlying pattern.
118	pat: Glob,
119	/// The pattern, as a compiled regex.
120	re: Regex,
121	}
122
123	impl GlobMatcher {
124	/// Tests whether the given path matches this pattern or not.
125	pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
126	self.is_match_candidate(&Candidate::new(path.as_ref()))
127	}
128
129	/// Tests whether the given path matches this pattern or not.
130	pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
131	self.re.is_match(&path.path)
132	}
133
134	/// Returns the `Glob` used to compile this matcher.
135	pub fn glob(&self) -> &Glob {
136	&self.pat
137	}
138	}
139
140	/// A strategic matcher for a single pattern.
141	#[cfg(test)]
142	#[derive(Clone, Debug)]
143	struct GlobStrategic {
144	/// The match strategy to use.
145	strategy: MatchStrategy,
146	/// The pattern, as a compiled regex.
147	re: Regex,
148	}
149
150	#[cfg(test)]
151	impl GlobStrategic {
152	/// Tests whether the given path matches this pattern or not.
153	fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
154	self.is_match_candidate(&Candidate::new(path.as_ref()))
155	}
156
157	/// Tests whether the given path matches this pattern or not.
158	fn is_match_candidate(&self, candidate: &Candidate<'_>) -> bool {
159	let byte_path = &*candidate.path;
160
161	match self.strategy {
162	MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
163	MatchStrategy::BasenameLiteral(ref lit) => {
164	lit.as_bytes() == &*candidate.basename
165	}
166	MatchStrategy::Extension(ref ext) => {
167	ext.as_bytes() == &*candidate.ext
168	}
169	MatchStrategy::Prefix(ref pre) => {
170	starts_with(pre.as_bytes(), byte_path)
171	}
172	MatchStrategy::Suffix { ref suffix, component } => {
173	if component && byte_path == &suffix.as_bytes()[`1`..] {
174	return `true`;
175	}
176	ends_with(suffix.as_bytes(), byte_path)
177	}
178	MatchStrategy::RequiredExtension(ref ext) => {
179	let ext = ext.as_bytes();
180	&*candidate.ext == ext && self.re.is_match(byte_path)
181	}
182	MatchStrategy::Regex => self.re.is_match(byte_path),
183	}
184	}
185	}
186
187	/// A builder for a pattern.
188	///
189	/// This builder enables configuring the match semantics of a pattern. For
190	/// example, one can make matching case insensitive.
191	///
192	/// The lifetime `'a` refers to the lifetime of the pattern string.
193	#[derive(Clone, Debug)]
194	pub struct GlobBuilder<'a> {
195	/// The glob pattern to compile.
196	glob: &'a str,
197	/// Options for the pattern.
198	opts: GlobOptions,
199	}
200
201	#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
202	struct GlobOptions {
203	/// Whether to match case insensitively.
204	case_insensitive: bool,
205	/// Whether to require a literal separator to match a separator in a file
206	/// path. e.g., when enabled, `` won't match `/`.*
207	literal_separator: bool,
208	/// Whether or not to use `\` to escape special characters.
209	/// e.g., when enabled, `\` will match a literal ``.
210	backslash_escape: bool,
211	}
212
213	impl GlobOptions {
214	fn default() -> GlobOptions {
215	GlobOptions {
216	case_insensitive: `false`,
217	literal_separator: `false`,
218	backslash_escape: !is_separator('`\\`'),
219	}
220	}
221	}
222
223	#[derive(Clone, Debug, Default, Eq, PartialEq)]
224	struct Tokens(Vec<Token>);
225
226	impl Deref for Tokens {
227	type Target = Vec<Token>;
228	fn deref(&self) -> &Vec<Token> {
229	&self.0
230	}
231	}
232
233	impl DerefMut for Tokens {
234	fn deref_mut(&mut self) -> &mut Vec<Token> {
235	&mut self.0
236	}
237	}
238
239	#[derive(Clone, Debug, Eq, PartialEq)]
240	enum Token {
241	Literal(char),
242	Any,
243	ZeroOrMore,
244	RecursivePrefix,
245	RecursiveSuffix,
246	RecursiveZeroOrMore,
247	Class { negated: bool, ranges: Vec<(char, char)> },
248	Alternates(Vec<Tokens>),
249	}
250
251	impl Glob {
252	/// Builds a new pattern with default options.
253	pub fn new(glob: &str) -> Result<Glob, Error> {
254	GlobBuilder::new(glob).build()
255	}
256
257	/// Returns a matcher for this pattern.
258	pub fn compile_matcher(&self) -> GlobMatcher {
259	let re =
260	new_regex(&self.re).expect("regex compilation shouldn't fail");
261	GlobMatcher { pat: self.clone(), re: re }
262	}
263
264	/// Returns a strategic matcher.
265	///
266	/// This isn't exposed because it's not clear whether it's actually
267	/// faster than just running a regex for a single* pattern. If it*
268	/// is faster, then GlobMatcher should do it automatically.
269	#[cfg(test)]
270	fn compile_strategic_matcher(&self) -> GlobStrategic {
271	let strategy = MatchStrategy::new(self);
272	let re =
273	new_regex(&self.re).expect("regex compilation shouldn't fail");
274	GlobStrategic { strategy: strategy, re: re }
275	}
276
277	/// Returns the original glob pattern used to build this pattern.
278	pub fn glob(&self) -> &str {
279	&self.glob
280	}
281
282	/// Returns the regular expression string for this glob.
283	///
284	/// Note that regular expressions for globs are intended to be matched on
285	/// arbitrary bytes (`&[u8]`) instead of Unicode strings (`&str`). In
286	/// particular, globs are frequently used on file paths, where there is no
287	/// general guarantee that file paths are themselves valid UTF-8. As a
288	/// result, callers will need to ensure that they are using a regex API
289	/// that can match on arbitrary bytes. For example, the
290	/// [`regex`](https://crates.io/regex)
291	/// crate's
292	/// [`Regex`](https://docs.rs/regex//regex/struct.Regex.html)*
293	/// API is not suitable for this since it matches on `&str`, but its
294	/// [`bytes::Regex`](https://docs.rs/regex//regex/bytes/struct.Regex.html)*
295	/// API is suitable for this.
296	pub fn regex(&self) -> &str {
297	&self.re
298	}
299
300	/// Returns the pattern as a literal if and only if the pattern must match
301	/// an entire path exactly.
302	///
303	/// The basic format of these patterns is `{literal}`.
304	fn literal(&self) -> Option<String> {
305	if self.opts.case_insensitive {
306	return None;
307	}
308	let mut lit = String::new();
309	for t in &*self.tokens {
310	match *t {
311	Token::Literal(c) => lit.push(c),
312	_ => return None,
313	}
314	}
315	if lit.is_empty() {
316	None
317	} else {
318	Some(lit)
319	}
320	}
321
322	/// Returns an extension if this pattern matches a file path if and only
323	/// if the file path has the extension returned.
324	///
325	/// Note that this extension returned differs from the extension that
326	/// std::path::Path::extension returns. Namely, this extension includes
327	/// the '.'. Also, paths like `.rs` are considered to have an extension
328	/// of `.rs`.
329	fn ext(&self) -> Option<String> {
330	if self.opts.case_insensitive {
331	return None;
332	}
333	let start = match self.tokens.get(`0`) {
334	Some(&Token::RecursivePrefix) => `1`,
335	Some(_) => `0`,
336	_ => return None,
337	};
338	match self.tokens.get(start) {
339	Some(&Token::ZeroOrMore) => {
340	// If there was no recursive prefix, then we only permit
341	// `` if `` can match a `/`. For example, if `` can't*
342	// match `/`, then `.c` doesn't match `foo/bar.c`.*
343	if start == `0` && self.opts.literal_separator {
344	return None;
345	}
346	}
347	_ => return None,
348	}
349	match self.tokens.get(start + `1`) {
350	Some(&Token::Literal('.')) => {}
351	_ => return None,
352	}
353	let mut lit = ".".to_string();
354	for t in self.tokens[start + `2`..].iter() {
355	match *t {
356	Token::Literal('.') \| Token::Literal('/') => return None,
357	Token::Literal(c) => lit.push(c),
358	_ => return None,
359	}
360	}
361	if lit.is_empty() {
362	None
363	} else {
364	Some(lit)
365	}
366	}
367
368	/// This is like `ext`, but returns an extension even if it isn't sufficient
369	/// to imply a match. Namely, if an extension is returned, then it is
370	/// necessary but not sufficient for a match.
371	fn required_ext(&self) -> Option<String> {
372	if self.opts.case_insensitive {
373	return None;
374	}
375	// We don't care at all about the beginning of this pattern. All we
376	// need to check for is if it ends with a literal of the form `.ext`.
377	let mut ext: Vec<char> = vec![]; // built in reverse
378	for t in self.tokens.iter().rev() {
379	match *t {
380	Token::Literal('/') => return None,
381	Token::Literal(c) => {
382	ext.push(c);
383	if c == '.' {
384	break;
385	}
386	}
387	_ => return None,
388	}
389	}
390	if ext.last() != Some(&'.') {
391	None
392	} else {
393	ext.reverse();
394	Some(ext.into_iter().collect())
395	}
396	}
397
398	/// Returns a literal prefix of this pattern if the entire pattern matches
399	/// if the literal prefix matches.
400	fn prefix(&self) -> Option<String> {
401	if self.opts.case_insensitive {
402	return None;
403	}
404	let (end, need_sep) = match self.tokens.last() {
405	Some(&Token::ZeroOrMore) => {
406	if self.opts.literal_separator {
407	// If a trailing `` can't match a `/`, then we can't*
408	// assume a match of the prefix corresponds to a match
409	// of the overall pattern. e.g., `foo/` with*
410	// `literal_separator` enabled matches `foo/bar` but not
411	// `foo/bar/baz`, even though `foo/bar/baz` has a `foo/`
412	// literal prefix.
413	return None;
414	}
415	(self.tokens.len() - `1`, `false`)
416	}
417	Some(&Token::RecursiveSuffix) => (self.tokens.len() - `1`, `true`),
418	_ => (self.tokens.len(), `false`),
419	};
420	let mut lit = String::new();
421	for t in &self.tokens[`0`..end] {
422	match *t {
423	Token::Literal(c) => lit.push(c),
424	_ => return None,
425	}
426	}
427	if need_sep {
428	lit.push('/');
429	}
430	if lit.is_empty() {
431	None
432	} else {
433	Some(lit)
434	}
435	}
436
437	/// Returns a literal suffix of this pattern if the entire pattern matches
438	/// if the literal suffix matches.
439	///
440	/// If a literal suffix is returned and it must match either the entire
441	/// file path or be preceded by a `/`, then also return true. This happens
442	/// with a pattern like `/foo/bar`. Namely, this pattern matches
443	/// `foo/bar` and `baz/foo/bar`, but not `foofoo/bar`. In this case, the
444	/// suffix returned is `/foo/bar` (but should match the entire path
445	/// `foo/bar`).
446	///
447	/// When this returns true, the suffix literal is guaranteed to start with
448	/// a `/`.
449	fn suffix(&self) -> Option<(String, bool)> {
450	if self.opts.case_insensitive {
451	return None;
452	}
453	let mut lit = String::new();
454	let (start, entire) = match self.tokens.get(`0`) {
455	Some(&Token::RecursivePrefix) => {
456	// We only care if this follows a path component if the next
457	// token is a literal.
458	if let Some(&Token::Literal(_)) = self.tokens.get(`1`) {
459	lit.push('/');
460	(`1`, `true`)
461	} else {
462	(`1`, `false`)
463	}
464	}
465	_ => (`0`, `false`),
466	};
467	let start = match self.tokens.get(start) {
468	Some(&Token::ZeroOrMore) => {
469	// If literal_separator is enabled, then a `` can't*
470	// necessarily match everything, so reporting a suffix match
471	// as a match of the pattern would be a false positive.
472	if self.opts.literal_separator {
473	return None;
474	}
475	start + `1`
476	}
477	_ => start,
478	};
479	for t in &self.tokens[start..] {
480	match *t {
481	Token::Literal(c) => lit.push(c),
482	_ => return None,
483	}
484	}
485	if lit.is_empty() \|\| lit == "/" {
486	None
487	} else {
488	Some((lit, entire))
489	}
490	}
491
492	/// If this pattern only needs to inspect the basename of a file path,
493	/// then the tokens corresponding to only the basename match are returned.
494	///
495	/// For example, given a pattern of `/.foo`, only the tokens*
496	/// corresponding to `.foo` are returned.*
497	///
498	/// Note that this will return None if any match of the basename tokens
499	/// doesn't correspond to a match of the entire pattern. For example, the
500	/// glob `foo` only matches when a file path has a basename of `foo`, but
501	/// doesn't always* match when a file path has a basename of `foo`. e.g.,*
502	/// `foo` doesn't match `abc/foo`.
503	fn basename_tokens(&self) -> Option<&[Token]> {
504	if self.opts.case_insensitive {
505	return None;
506	}
507	let start = match self.tokens.get(`0`) {
508	Some(&Token::RecursivePrefix) => `1`,
509	_ => {
510	// With nothing to gobble up the parent portion of a path,
511	// we can't assume that matching on only the basename is
512	// correct.
513	return None;
514	}
515	};
516	if self.tokens[start..].is_empty() {
517	return None;
518	}
519	for t in &self.tokens[start..] {
520	match *t {
521	Token::Literal('/') => return None,
522	Token::Literal(_) => {} // OK
523	Token::Any \| Token::ZeroOrMore => {
524	if !self.opts.literal_separator {
525	// In this case, `` and `?` can match a path*
526	// separator, which means this could reach outside
527	// the basename.
528	return None;
529	}
530	}
531	Token::RecursivePrefix
532	\| Token::RecursiveSuffix
533	\| Token::RecursiveZeroOrMore => {
534	return None;
535	}
536	Token::Class { .. } \| Token::Alternates(..) => {
537	// We could* be a little smarter here, but either one*
538	// of these is going to prevent our literal optimizations
539	// anyway, so give up.
540	return None;
541	}
542	}
543	}
544	Some(&self.tokens[start..])
545	}
546
547	/// Returns the pattern as a literal if and only if the pattern exclusively
548	/// matches the basename of a file path and* is a literal.*
549	///
550	/// The basic format of these patterns is `/{literal}`, where `{literal}`
551	/// does not contain a path separator.
552	fn basename_literal(&self) -> Option<String> {
553	let tokens = match self.basename_tokens() {
554	None => return None,
555	Some(tokens) => tokens,
556	};
557	let mut lit = String::new();
558	for t in tokens {
559	match *t {
560	Token::Literal(c) => lit.push(c),
561	_ => return None,
562	}
563	}
564	Some(lit)
565	}
566	}
567
568	impl<'a> GlobBuilder<'a> {
569	/// Create a new builder for the pattern given.
570	///
571	/// The pattern is not compiled until `build` is called.
572	pub fn new(glob: &'a str) -> GlobBuilder<'a> {
573	GlobBuilder { glob: glob, opts: GlobOptions::default() }
574	}
575
576	/// Parses and builds the pattern.
577	pub fn build(&self) -> Result<Glob, Error> {
578	let mut p = Parser {
579	glob: &self.glob,
580	stack: vec![Tokens::default()],
581	chars: self.glob.chars().peekable(),
582	prev: None,
583	cur: None,
584	opts: &self.opts,
585	};
586	p.parse()?;
587	if p.stack.is_empty() {
588	Err(Error {
589	glob: Some(self.glob.to_string()),
590	kind: ErrorKind::UnopenedAlternates,
591	})
592	} else if p.stack.len() > `1` {
593	Err(Error {
594	glob: Some(self.glob.to_string()),
595	kind: ErrorKind::UnclosedAlternates,
596	})
597	} else {
598	let tokens = p.stack.pop().unwrap();
599	Ok(Glob {
600	glob: self.glob.to_string(),
601	re: tokens.to_regex_with(&self.opts),
602	opts: self.opts,
603	tokens: tokens,
604	})
605	}
606	}
607
608	/// Toggle whether the pattern matches case insensitively or not.
609	///
610	/// This is disabled by default.
611	pub fn case_insensitive(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
612	self.opts.case_insensitive = yes;
613	self
614	}
615
616	/// Toggle whether a literal `/` is required to match a path separator.
617	///
618	/// By default this is false: `` and `?` will match `/`.*
619	pub fn literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
620	self.opts.literal_separator = yes;
621	self
622	}
623
624	/// When enabled, a back slash (`\`) may be used to escape
625	/// special characters in a glob pattern. Additionally, this will
626	/// prevent `\` from being interpreted as a path separator on all
627	/// platforms.
628	///
629	/// This is enabled by default on platforms where `\` is not a
630	/// path separator and disabled by default on platforms where `\`
631	/// is a path separator.
632	pub fn backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
633	self.opts.backslash_escape = yes;
634	self
635	}
636	}
637
638	impl Tokens {
639	/// Convert this pattern to a string that is guaranteed to be a valid
640	/// regular expression and will represent the matching semantics of this
641	/// glob pattern and the options given.
642	fn to_regex_with(&self, options: &GlobOptions) -> String {
643	let mut re = String::new();
644	re.push_str("(?-u)");
645	if options.case_insensitive {
646	re.push_str("(?i)");
647	}
648	re.push('^');
649	// Special case. If the entire glob is just ``, then it should match
650	// everything.
651	if self.len() == `1` && self[`0`] == Token::RecursivePrefix {
652	re.push_str(".*");
653	re.push('$');
654	return re;
655	}
656	self.tokens_to_regex(options, &self, &mut re);
657	re.push('$');
658	re
659	}
660
661	fn tokens_to_regex(
662	&self,
663	options: &GlobOptions,
664	tokens: &[Token],
665	re: &mut String,
666	) {
667	for tok in tokens {
668	match *tok {
669	Token::Literal(c) => {
670	re.push_str(&char_to_escaped_literal(c));
671	}
672	Token::Any => {
673	if options.literal_separator {
674	re.push_str("[^/]");
675	} else {
676	re.push_str(".");
677	}
678	}
679	Token::ZeroOrMore => {
680	if options.literal_separator {
681	re.push_str("[^/]*");
682	} else {
683	re.push_str(".*");
684	}
685	}
686	Token::RecursivePrefix => {
687	re.push_str("(?:/?\|.*/)");
688	}
689	Token::RecursiveSuffix => {
690	re.push_str("/.*");
691	}
692	Token::RecursiveZeroOrMore => {
693	re.push_str("(?:/\|/.*/)");
694	}
695	Token::Class { negated, ref ranges } => {
696	re.push('[');
697	if negated {
698	re.push('^');
699	}
700	for r in ranges {
701	if r.0 == r.1 {
702	// Not strictly necessary, but nicer to look at.
703	re.push_str(&char_to_escaped_literal(r.0));
704	} else {
705	re.push_str(&char_to_escaped_literal(r.0));
706	re.push('-');
707	re.push_str(&char_to_escaped_literal(r.1));
708	}
709	}
710	re.push(']');
711	}
712	Token::Alternates(ref patterns) => {
713	let mut parts = vec![];
714	for pat in patterns {
715	let mut altre = String::new();
716	self.tokens_to_regex(options, &pat, &mut altre);
717	if !altre.is_empty() {
718	parts.push(altre);
719	}
720	}
721
722	// It is possible to have an empty set in which case the
723	// resulting alternation '()' would be an error.
724	if !parts.is_empty() {
725	re.push('(');
726	re.push_str(&parts.join("\|"));
727	re.push(')');
728	}
729	}
730	}
731	}
732	}
733	}
734
735	/// Convert a Unicode scalar value to an escaped string suitable for use as
736	/// a literal in a non-Unicode regex.
737	fn char_to_escaped_literal(c: char) -> String {
738	bytes_to_escaped_literal(&c.to_string().into_bytes())
739	}
740
741	/// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII
742	/// code units are converted to their escaped form.
743	fn bytes_to_escaped_literal(bs: &[u8]) -> String {
744	let mut s: String = String::with_capacity(bs.len());
745	for &b: u8 in bs {
746	if b <= `0x7F` {
747	s.push_str(&regex::escape(&(b as char).to_string()));
748	} else {
749	s.push_str(&format!("`\\`x{:`02`x}", b));
750	}
751	}
752	s
753	}
754
755	struct Parser<'a> {
756	glob: &'a str,
757	stack: Vec<Tokens>,
758	chars: iter::Peekable<str::Chars<'a>>,
759	prev: Option<char>,
760	cur: Option<char>,
761	opts: &'a GlobOptions,
762	}
763
764	impl<'a> Parser<'a> {
765	fn error(&self, kind: ErrorKind) -> Error {
766	Error { glob: Some(self.glob.to_string()), kind: kind }
767	}
768
769	fn parse(&mut self) -> Result<(), Error> {
770	while let Some(c) = self.bump() {
771	match c {
772	'?' => self.push_token(Token::Any)?,
773	'*' => self.parse_star()?,
774	'[' => self.parse_class()?,
775	'{' => self.push_alternate()?,
776	'}' => self.pop_alternate()?,
777	',' => self.parse_comma()?,
778	'`\\`' => self.parse_backslash()?,
779	c => self.push_token(Token::Literal(c))?,
780	}
781	}
782	Ok(())
783	}
784
785	fn push_alternate(&mut self) -> Result<(), Error> {
786	if self.stack.len() > `1` {
787	return Err(self.error(ErrorKind::NestedAlternates));
788	}
789	Ok(self.stack.push(Tokens::default()))
790	}
791
792	fn pop_alternate(&mut self) -> Result<(), Error> {
793	let mut alts = vec![];
794	while self.stack.len() >= `2` {
795	alts.push(self.stack.pop().unwrap());
796	}
797	self.push_token(Token::Alternates(alts))
798	}
799
800	fn push_token(&mut self, tok: Token) -> Result<(), Error> {
801	if let Some(ref mut pat) = self.stack.last_mut() {
802	return Ok(pat.push(tok));
803	}
804	Err(self.error(ErrorKind::UnopenedAlternates))
805	}
806
807	fn pop_token(&mut self) -> Result<Token, Error> {
808	if let Some(ref mut pat) = self.stack.last_mut() {
809	return Ok(pat.pop().unwrap());
810	}
811	Err(self.error(ErrorKind::UnopenedAlternates))
812	}
813
814	fn have_tokens(&self) -> Result<bool, Error> {
815	match self.stack.last() {
816	None => Err(self.error(ErrorKind::UnopenedAlternates)),
817	Some(ref pat) => Ok(!pat.is_empty()),
818	}
819	}
820
821	fn parse_comma(&mut self) -> Result<(), Error> {
822	// If we aren't inside a group alternation, then don't
823	// treat commas specially. Otherwise, we need to start
824	// a new alternate.
825	if self.stack.len() <= `1` {
826	self.push_token(Token::Literal(','))
827	} else {
828	Ok(self.stack.push(Tokens::default()))
829	}
830	}
831
832	fn parse_backslash(&mut self) -> Result<(), Error> {
833	if self.opts.backslash_escape {
834	match self.bump() {
835	None => Err(self.error(ErrorKind::DanglingEscape)),
836	Some(c) => self.push_token(Token::Literal(c)),
837	}
838	} else if is_separator('`\\`') {
839	// Normalize all patterns to use / as a separator.
840	self.push_token(Token::Literal('/'))
841	} else {
842	self.push_token(Token::Literal('`\\`'))
843	}
844	}
845
846	fn parse_star(&mut self) -> Result<(), Error> {
847	let prev = self.prev;
848	if self.peek() != Some('*') {
849	self.push_token(Token::ZeroOrMore)?;
850	return Ok(());
851	}
852	assert!(self.bump() == Some('*'));
853	if !self.have_tokens()? {
854	if !self.peek().map_or(`true`, is_separator) {
855	self.push_token(Token::ZeroOrMore)?;
856	self.push_token(Token::ZeroOrMore)?;
857	} else {
858	self.push_token(Token::RecursivePrefix)?;
859	assert!(self.bump().map_or(`true`, is_separator));
860	}
861	return Ok(());
862	}
863
864	if !prev.map(is_separator).unwrap_or(`false`) {
865	if self.stack.len() <= `1`
866	\|\| (prev != Some(',') && prev != Some('{'))
867	{
868	self.push_token(Token::ZeroOrMore)?;
869	self.push_token(Token::ZeroOrMore)?;
870	return Ok(());
871	}
872	}
873	let is_suffix = match self.peek() {
874	None => {
875	assert!(self.bump().is_none());
876	`true`
877	}
878	Some(',') \| Some('}') if self.stack.len() >= `2` => `true`,
879	Some(c) if is_separator(c) => {
880	assert!(self.bump().map(is_separator).unwrap_or(`false`));
881	`false`
882	}
883	_ => {
884	self.push_token(Token::ZeroOrMore)?;
885	self.push_token(Token::ZeroOrMore)?;
886	return Ok(());
887	}
888	};
889	match self.pop_token()? {
890	Token::RecursivePrefix => {
891	self.push_token(Token::RecursivePrefix)?;
892	}
893	Token::RecursiveSuffix => {
894	self.push_token(Token::RecursiveSuffix)?;
895	}
896	_ => {
897	if is_suffix {
898	self.push_token(Token::RecursiveSuffix)?;
899	} else {
900	self.push_token(Token::RecursiveZeroOrMore)?;
901	}
902	}
903	}
904	Ok(())
905	}
906
907	fn parse_class(&mut self) -> Result<(), Error> {
908	fn add_to_last_range(
909	glob: &str,
910	r: &mut (char, char),
911	add: char,
912	) -> Result<(), Error> {
913	r.1 = add;
914	if r.1 < r.0 {
915	Err(Error {
916	glob: Some(glob.to_string()),
917	kind: ErrorKind::InvalidRange(r.0, r.1),
918	})
919	} else {
920	Ok(())
921	}
922	}
923	let mut ranges = vec![];
924	let negated = match self.chars.peek() {
925	Some(&'!') \| Some(&'^') => {
926	let bump = self.bump();
927	assert!(bump == Some('!') \|\| bump == Some('^'));
928	`true`
929	}
930	_ => `false`,
931	};
932	let mut first = `true`;
933	let mut in_range = `false`;
934	loop {
935	let c = match self.bump() {
936	Some(c) => c,
937	// The only way to successfully break this loop is to observe
938	// a ']'.
939	None => return Err(self.error(ErrorKind::UnclosedClass)),
940	};
941	match c {
942	']' => {
943	if first {
944	ranges.push((']', ']'));
945	} else {
946	break;
947	}
948	}
949	'-' => {
950	if first {
951	ranges.push(('-', '-'));
952	} else if in_range {
953	// invariant: in_range is only set when there is
954	// already at least one character seen.
955	let r = ranges.last_mut().unwrap();
956	add_to_last_range(&self.glob, r, '-')?;
957	in_range = `false`;
958	} else {
959	assert!(!ranges.is_empty());
960	in_range = `true`;
961	}
962	}
963	c => {
964	if in_range {
965	// invariant: in_range is only set when there is
966	// already at least one character seen.
967	add_to_last_range(
968	&self.glob,
969	ranges.last_mut().unwrap(),
970	c,
971	)?;
972	} else {
973	ranges.push((c, c));
974	}
975	in_range = `false`;
976	}
977	}
978	first = `false`;
979	}
980	if in_range {
981	// Means that the last character in the class was a '-', so add
982	// it as a literal.
983	ranges.push(('-', '-'));
984	}
985	self.push_token(Token::Class { negated: negated, ranges: ranges })
986	}
987
988	fn bump(&mut self) -> Option<char> {
989	self.prev = self.cur;
990	self.cur = self.chars.next();
991	self.cur
992	}
993
994	fn peek(&mut self) -> Option<char> {
995	self.chars.peek().map(\|&ch\| ch)
996	}
997	}
998
999	#[cfg(test)]
1000	fn starts_with(needle: &[u8], haystack: &[u8]) -> bool {
1001	needle.len() <= haystack.len() && needle == &haystack[..needle.len()]
1002	}
1003
1004	#[cfg(test)]
1005	fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
1006	if needle.len() > haystack.len() {
1007	return `false`;
1008	}
1009	needle == &haystack[haystack.len() - needle.len()..]
1010	}
1011
1012	#[cfg(test)]
1013	mod tests {
1014	use super::Token::*;
1015	use super::{Glob, GlobBuilder, Token};
1016	use crate::{ErrorKind, GlobSetBuilder};
1017
1018	#[derive(Clone, Copy, Debug, Default)]
1019	struct Options {
1020	casei: Option<bool>,
1021	litsep: Option<bool>,
1022	bsesc: Option<bool>,
1023	}
1024
1025	macro_rules! syntax {
1026	($name:ident, $pat:expr, $tokens:expr) => {
1027	#[test]
1028	fn $name() {
1029	let pat = Glob::new($pat).unwrap();
1030	assert_eq!($tokens, pat.tokens.`0`);
1031	}
1032	};
1033	}
1034
1035	macro_rules! syntaxerr {
1036	($name:ident, $pat:expr, $err:expr) => {
1037	#[test]
1038	fn $name() {
1039	let err = Glob::new($pat).unwrap_err();
1040	assert_eq!(&$err, err.kind());
1041	}
1042	};
1043	}
1044
1045	macro_rules! toregex {
1046	($name:ident, $pat:expr, $re:expr) => {
1047	toregex!($name, $pat, $re, Options::default());
1048	};
1049	($name:ident, $pat:expr, $re:expr, $options:expr) => {
1050	#[test]
1051	fn $name() {
1052	let mut builder = GlobBuilder::new($pat);
1053	if let Some(casei) = $options.casei {
1054	builder.case_insensitive(casei);
1055	}
1056	if let Some(litsep) = $options.litsep {
1057	builder.literal_separator(litsep);
1058	}
1059	if let Some(bsesc) = $options.bsesc {
1060	builder.backslash_escape(bsesc);
1061	}
1062	let pat = builder.build().unwrap();
1063	assert_eq!(format!("(?-u){}", $re), pat.regex());
1064	}
1065	};
1066	}
1067
1068	macro_rules! matches {
1069	($name:ident, $pat:expr, $path:expr) => {
1070	matches!($name, $pat, $path, Options::default());
1071	};
1072	($name:ident, $pat:expr, $path:expr, $options:expr) => {
1073	#[test]
1074	fn $name() {
1075	let mut builder = GlobBuilder::new($pat);
1076	if let Some(casei) = $options.casei {
1077	builder.case_insensitive(casei);
1078	}
1079	if let Some(litsep) = $options.litsep {
1080	builder.literal_separator(litsep);
1081	}
1082	if let Some(bsesc) = $options.bsesc {
1083	builder.backslash_escape(bsesc);
1084	}
1085	let pat = builder.build().unwrap();
1086	let matcher = pat.compile_matcher();
1087	let strategic = pat.compile_strategic_matcher();
1088	let set = GlobSetBuilder::new().add(pat).build().unwrap();
1089	assert!(matcher.is_match($path));
1090	assert!(strategic.is_match($path));
1091	assert!(set.is_match($path));
1092	}
1093	};
1094	}
1095
1096	macro_rules! nmatches {
1097	($name:ident, $pat:expr, $path:expr) => {
1098	nmatches!($name, $pat, $path, Options::default());
1099	};
1100	($name:ident, $pat:expr, $path:expr, $options:expr) => {
1101	#[test]
1102	fn $name() {
1103	let mut builder = GlobBuilder::new($pat);
1104	if let Some(casei) = $options.casei {
1105	builder.case_insensitive(casei);
1106	}
1107	if let Some(litsep) = $options.litsep {
1108	builder.literal_separator(litsep);
1109	}
1110	if let Some(bsesc) = $options.bsesc {
1111	builder.backslash_escape(bsesc);
1112	}
1113	let pat = builder.build().unwrap();
1114	let matcher = pat.compile_matcher();
1115	let strategic = pat.compile_strategic_matcher();
1116	let set = GlobSetBuilder::new().add(pat).build().unwrap();
1117	assert!(!matcher.is_match($path));
1118	assert!(!strategic.is_match($path));
1119	assert!(!set.is_match($path));
1120	}
1121	};
1122	}
1123
1124	fn s(string: &str) -> String {
1125	string.to_string()
1126	}
1127
1128	fn class(s: char, e: char) -> Token {
1129	Class { negated: `false`, ranges: vec![(s, e)] }
1130	}
1131
1132	fn classn(s: char, e: char) -> Token {
1133	Class { negated: `true`, ranges: vec![(s, e)] }
1134	}
1135
1136	fn rclass(ranges: &[(char, char)]) -> Token {
1137	Class { negated: `false`, ranges: ranges.to_vec() }
1138	}
1139
1140	fn rclassn(ranges: &[(char, char)]) -> Token {
1141	Class { negated: `true`, ranges: ranges.to_vec() }
1142	}
1143
1144	syntax!(literal1, "a", vec![Literal('a')]);
1145	syntax!(literal2, "ab", vec![Literal('a'), Literal('b')]);
1146	syntax!(any1, "?", vec![Any]);
1147	syntax!(any2, "a?b", vec![Literal('a'), Any, Literal('b')]);
1148	syntax!(seq1, "*", vec![ZeroOrMore]);
1149	syntax!(seq2, "a*b", vec![Literal('a'), ZeroOrMore, Literal('b')]);
1150	syntax!(
1151	seq3,
1152	"ab*",
1153	vec![ZeroOrMore, Literal('a'), ZeroOrMore, Literal('b'), ZeroOrMore,]
1154	);
1155	syntax!(rseq1, "**", vec![RecursivePrefix]);
1156	syntax!(rseq2, "**/", vec![RecursivePrefix]);
1157	syntax!(rseq3, "/**", vec![RecursiveSuffix]);
1158	syntax!(rseq4, "/**/", vec![RecursiveZeroOrMore]);
1159	syntax!(
1160	rseq5,
1161	"a/**/b",
1162	vec![Literal('a'), RecursiveZeroOrMore, Literal('b'),]
1163	);
1164	syntax!(cls1, "[a]", vec![class('a', 'a')]);
1165	syntax!(cls2, "[!a]", vec![classn('a', 'a')]);
1166	syntax!(cls3, "[a-z]", vec![class('a', 'z')]);
1167	syntax!(cls4, "[!a-z]", vec![classn('a', 'z')]);
1168	syntax!(cls5, "[-]", vec![class('-', '-')]);
1169	syntax!(cls6, "[]]", vec![class(']', ']')]);
1170	syntax!(cls7, "[]", vec![class('', '*')]);
1171	syntax!(cls8, "[!!]", vec![classn('!', '!')]);
1172	syntax!(cls9, "[a-]", vec![rclass(&[('a', 'a'), ('-', '-')])]);
1173	syntax!(cls10, "[-a-z]", vec![rclass(&[('-', '-'), ('a', 'z')])]);
1174	syntax!(cls11, "[a-z-]", vec![rclass(&[('a', 'z'), ('-', '-')])]);
1175	syntax!(
1176	cls12,
1177	"[-a-z-]",
1178	vec![rclass(&[('-', '-'), ('a', 'z'), ('-', '-')]),]
1179	);
1180	syntax!(cls13, "[]-z]", vec![class(']', 'z')]);
1181	syntax!(cls14, "[--z]", vec![class('-', 'z')]);
1182	syntax!(cls15, "[ --]", vec![class(' ', '-')]);
1183	syntax!(cls16, "[0-9a-z]", vec![rclass(&[('0', '9'), ('a', 'z')])]);
1184	syntax!(cls17, "[a-z0-9]", vec![rclass(&[('a', 'z'), ('0', '9')])]);
1185	syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]);
1186	syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]);
1187	syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
1188	syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
1189
1190	syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
1191	syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
1192	syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
1193	syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass);
1194	syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
1195	syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
1196
1197	const CASEI: Options =
1198	Options { casei: Some(`true`), litsep: None, bsesc: None };
1199	const SLASHLIT: Options =
1200	Options { casei: None, litsep: Some(`true`), bsesc: None };
1201	const NOBSESC: Options =
1202	Options { casei: None, litsep: None, bsesc: Some(`false`) };
1203	const BSESC: Options =
1204	Options { casei: None, litsep: None, bsesc: Some(`true`) };
1205
1206	toregex!(re_casei, "a", "(?i)^a$", &CASEI);
1207
1208	toregex!(re_slash1, "?", r"^[^/]$", SLASHLIT);
1209	toregex!(re_slash2, "", r"^[^/]$", SLASHLIT);
1210
1211	toregex!(re1, "a", "^a$");
1212	toregex!(re2, "?", "^.$");
1213	toregex!(re3, "", "^.$");
1214	toregex!(re4, "a?", "^a.$");
1215	toregex!(re5, "?a", "^.a$");
1216	toregex!(re6, "a", "^a.$");
1217	toregex!(re7, "a", "^.a$");
1218	toregex!(re8, "[]", r"^[\]$");
1219	toregex!(re9, "[+]", r"^[\+]$");
1220	toregex!(re10, "+", r"^\+$");
1221	toregex!(re11, "☃", r"^\xe2\x98\x83$");
1222	toregex!(re12, "*", r"^.$");
1223	toregex!(re13, "*/", r"^.$");
1224	toregex!(re14, "*/", r"^(?:/?\|./).$");
1225	toregex!(re15, "/", r"^.*$");
1226	toregex!(re16, "//", r"^(?:/?\|./).*$");
1227	toregex!(re17, "//*", r"^.$");
1228	toregex!(re18, "//*/", r"^(?:/?\|./).$");
1229	toregex!(re19, "a/*", r"^a/.$");
1230	toregex!(re20, "a//", r"^a/.*$");
1231	toregex!(re21, "a///*", r"^a/.$");
1232	toregex!(re22, "a/*/b", r"^a(?:/\|/./)b$");
1233	toregex!(re23, "a///b", r"^a(?:/\|/.*/)b$");
1234	toregex!(re24, "a///*/b", r"^a(?:/\|/./)b$");
1235	toregex!(re25, "*/b", r"^(?:/?\|./)b$");
1236	toregex!(re26, "//b", r"^(?:/?\|.*/)b$");
1237	toregex!(re27, "//*/b", r"^(?:/?\|./)b$");
1238	toregex!(re28, "a*", r"^a..*$");
1239	toregex!(re29, "*a", r"^..*a$");
1240	toregex!(re30, "a*b", r"^a..*b$");
1241	toregex!(re31, "**", r"^...$");
1242	toregex!(re32, "/a*", r"^/a..*$");
1243	toregex!(re33, "/*a", r"^/..*a$");
1244	toregex!(re34, "/a*b", r"^/a..*b$");
1245
1246	matches!(match1, "a", "a");
1247	matches!(match2, "a*b", "a_b");
1248	matches!(match3, "abc", "abc");
1249	matches!(match4, "abc", "a_b_c");
1250	matches!(match5, "abc", "a___b___c");
1251	matches!(match6, "abcabcabc", "abcabcabcabcabcabcabc");
1252	matches!(match7, "aaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1253	matches!(match8, "ab[xyz]cd", "abxcdbxcddd");
1254	matches!(match9, "*.rs", ".rs");
1255	matches!(match10, "☃", "☃");
1256
1257	matches!(matchrec1, "some/**/needle.txt", "some/needle.txt");
1258	matches!(matchrec2, "some/**/needle.txt", "some/one/needle.txt");
1259	matches!(matchrec3, "some/**/needle.txt", "some/one/two/needle.txt");
1260	matches!(matchrec4, "some/**/needle.txt", "some/other/needle.txt");
1261	matches!(matchrec5, "**", "abcde");
1262	matches!(matchrec6, "**", "");
1263	matches!(matchrec7, "**", ".asdf");
1264	matches!(matchrec8, "**", "/x/.asdf");
1265	matches!(matchrec9, "some///needle.txt", "some/needle.txt");
1266	matches!(matchrec10, "some///needle.txt", "some/one/needle.txt");
1267	matches!(matchrec11, "some///needle.txt", "some/one/two/needle.txt");
1268	matches!(matchrec12, "some///needle.txt", "some/other/needle.txt");
1269	matches!(matchrec13, "**/test", "one/two/test");
1270	matches!(matchrec14, "**/test", "one/test");
1271	matches!(matchrec15, "**/test", "test");
1272	matches!(matchrec16, "/**/test", "/one/two/test");
1273	matches!(matchrec17, "/**/test", "/one/test");
1274	matches!(matchrec18, "/**/test", "/test");
1275	matches!(matchrec19, "*/.", ".abc");
1276	matches!(matchrec20, "*/.", "abc/.abc");
1277	matches!(matchrec21, "**/foo/bar", "foo/bar");
1278	matches!(matchrec22, "./*", ".abc/abc");
1279	matches!(matchrec23, "test/**", "test/");
1280	matches!(matchrec24, "test/**", "test/one");
1281	matches!(matchrec25, "test/**", "test/one/two");
1282	matches!(matchrec26, "some/*/needle.txt", "some/one/needle.txt");
1283
1284	matches!(matchrange1, "a[0-9]b", "a0b");
1285	matches!(matchrange2, "a[0-9]b", "a9b");
1286	matches!(matchrange3, "a[!0-9]b", "a_b");
1287	matches!(matchrange4, "[a-z123]", "1");
1288	matches!(matchrange5, "[1a-z23]", "1");
1289	matches!(matchrange6, "[123a-z]", "1");
1290	matches!(matchrange7, "[abc-]", "-");
1291	matches!(matchrange8, "[-abc]", "-");
1292	matches!(matchrange9, "[-a-c]", "b");
1293	matches!(matchrange10, "[a-c-]", "b");
1294	matches!(matchrange11, "[-]", "-");
1295	matches!(matchrange12, "a[^0-9]b", "a_b");
1296
1297	matches!(matchpat1, "*hello.txt", "hello.txt");
1298	matches!(matchpat2, "*hello.txt", "gareth_says_hello.txt");
1299	matches!(matchpat3, "*hello.txt", "some/path/to/hello.txt");
1300	matches!(matchpat4, "*hello.txt", "some`\\`path`\\`to`\\`hello.txt");
1301	matches!(matchpat5, "*hello.txt", "/an/absolute/path/to/hello.txt");
1302	matches!(matchpat6, "*some/path/to/hello.txt", "some/path/to/hello.txt");
1303	matches!(
1304	matchpat7,
1305	"*some/path/to/hello.txt",
1306	"a/bigger/some/path/to/hello.txt"
1307	);
1308
1309	matches!(matchescape, "_[[]_[]]_[?]_[]_!_", "_[_]_?__!_");
1310
1311	matches!(matchcasei1, "aBcDeFg", "aBcDeFg", CASEI);
1312	matches!(matchcasei2, "aBcDeFg", "abcdefg", CASEI);
1313	matches!(matchcasei3, "aBcDeFg", "ABCDEFG", CASEI);
1314	matches!(matchcasei4, "aBcDeFg", "AbCdEfG", CASEI);
1315
1316	matches!(matchalt1, "a,b", "a,b");
1317	matches!(matchalt2, ",", ",");
1318	matches!(matchalt3, "{a,b}", "a");
1319	matches!(matchalt4, "{a,b}", "b");
1320	matches!(matchalt5, "{/src/,foo}", "abc/src/bar");
1321	matches!(matchalt6, "{/src/,foo}", "foo");
1322	matches!(matchalt7, "{[}],foo}", "}");
1323	matches!(matchalt8, "{foo}", "foo");
1324	matches!(matchalt9, "{}", "");
1325	matches!(matchalt10, "{,}", "");
1326	matches!(matchalt11, "{.foo,.bar,*.wat}", "test.foo");
1327	matches!(matchalt12, "{.foo,.bar,*.wat}", "test.bar");
1328	matches!(matchalt13, "{.foo,.bar,*.wat}", "test.wat");
1329
1330	matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
1331	#[cfg(unix)]
1332	nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
1333	#[cfg(not(unix))]
1334	nmatches!(matchslash2, "abc?def", "abc`\\`def", SLASHLIT);
1335	nmatches!(matchslash3, "abc*def", "abc/def", SLASHLIT);
1336	matches!(matchslash4, "abc[/]def", "abc/def", SLASHLIT); // differs
1337	#[cfg(unix)]
1338	nmatches!(matchslash5, "abc`\\`def", "abc/def", SLASHLIT);
1339	#[cfg(not(unix))]
1340	matches!(matchslash5, "abc`\\`def", "abc/def", SLASHLIT);
1341
1342	matches!(matchbackslash1, "`\\`[", "[", BSESC);
1343	matches!(matchbackslash2, "`\\`?", "?", BSESC);
1344	matches!(matchbackslash3, "`\\`", "", BSESC);
1345	matches!(matchbackslash4, "`\\`[a-z]", "`\\`a", NOBSESC);
1346	matches!(matchbackslash5, "`\\`?", "`\\`a", NOBSESC);
1347	matches!(matchbackslash6, "`\\`*", "`\\\\`", NOBSESC);
1348	#[cfg(unix)]
1349	matches!(matchbackslash7, "`\\`a", "a");
1350	#[cfg(not(unix))]
1351	matches!(matchbackslash8, "`\\`a", "/a");
1352
1353	nmatches!(matchnot1, "abc", "abcd");
1354	nmatches!(matchnot2, "abcabcabc", "abcabcabcabcabcabcabca");
1355	nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt");
1356	nmatches!(matchnot4, "some///needle.txt", "some/other/notthis.txt");
1357	nmatches!(matchnot5, "/**/test", "test");
1358	nmatches!(matchnot6, "/**/test", "/one/notthis");
1359	nmatches!(matchnot7, "/**/test", "/notthis");
1360	nmatches!(matchnot8, "*/.", "ab.c");
1361	nmatches!(matchnot9, "*/.", "abc/ab.c");
1362	nmatches!(matchnot10, "./*", "a.bc");
1363	nmatches!(matchnot11, "./*", "abc/a.bc");
1364	nmatches!(matchnot12, "a[0-9]b", "a_b");
1365	nmatches!(matchnot13, "a[!0-9]b", "a0b");
1366	nmatches!(matchnot14, "a[!0-9]b", "a9b");
1367	nmatches!(matchnot15, "[!-]", "-");
1368	nmatches!(matchnot16, "*hello.txt", "hello.txt-and-then-some");
1369	nmatches!(matchnot17, "*hello.txt", "goodbye.txt");
1370	nmatches!(
1371	matchnot18,
1372	"*some/path/to/hello.txt",
1373	"some/path/to/hello.txt-and-then-some"
1374	);
1375	nmatches!(
1376	matchnot19,
1377	"*some/path/to/hello.txt",
1378	"some/other/path/to/hello.txt"
1379	);
1380	nmatches!(matchnot20, "a", "foo/a");
1381	nmatches!(matchnot21, "./foo", "foo");
1382	nmatches!(matchnot22, "**/foo", "foofoo");
1383	nmatches!(matchnot23, "**/foo/bar", "foofoo/bar");
1384	nmatches!(matchnot24, "/*.c", "mozilla-sha1/sha1.c");
1385	nmatches!(matchnot25, "*.c", "mozilla-sha1/sha1.c", SLASHLIT);
1386	nmatches!(
1387	matchnot26,
1388	"**/m4/ltoptions.m4",
1389	"csharp/src/packages/repositories.config",
1390	SLASHLIT
1391	);
1392	nmatches!(matchnot27, "a[^0-9]b", "a0b");
1393	nmatches!(matchnot28, "a[^0-9]b", "a9b");
1394	nmatches!(matchnot29, "[^-]", "-");
1395	nmatches!(matchnot30, "some/*/needle.txt", "some/needle.txt");
1396	nmatches!(
1397	matchrec31,
1398	"some/*/needle.txt",
1399	"some/one/two/needle.txt",
1400	SLASHLIT
1401	);
1402	nmatches!(
1403	matchrec32,
1404	"some/*/needle.txt",
1405	"some/one/two/three/needle.txt",
1406	SLASHLIT
1407	);
1408	nmatches!(matchrec33, "./*", ".abc");
1409	nmatches!(matchrec34, "foo/**", "foo");
1410
1411	macro_rules! extract {
1412	($which:ident, $name:ident, $pat:expr, $expect:expr) => {
1413	extract!($which, $name, $pat, $expect, Options::default());
1414	};
1415	($which:ident, $name:ident, $pat:expr, $expect:expr, $options:expr) => {
1416	#[test]
1417	fn $name() {
1418	let mut builder = GlobBuilder::new($pat);
1419	if let Some(casei) = $options.casei {
1420	builder.case_insensitive(casei);
1421	}
1422	if let Some(litsep) = $options.litsep {
1423	builder.literal_separator(litsep);
1424	}
1425	if let Some(bsesc) = $options.bsesc {
1426	builder.backslash_escape(bsesc);
1427	}
1428	let pat = builder.build().unwrap();
1429	assert_eq!($expect, pat.$which());
1430	}
1431	};
1432	}
1433
1434	macro_rules! literal {
1435	($($tt:tt)) => { extract!(literal, $($tt)); }
1436	}
1437
1438	macro_rules! basetokens {
1439	($($tt:tt)) => { extract!(basename_tokens, $($tt)); }
1440	}
1441
1442	macro_rules! ext {
1443	($($tt:tt)) => { extract!(ext, $($tt)); }
1444	}
1445
1446	macro_rules! required_ext {
1447	($($tt:tt)) => { extract!(required_ext, $($tt)); }
1448	}
1449
1450	macro_rules! prefix {
1451	($($tt:tt)) => { extract!(prefix, $($tt)); }
1452	}
1453
1454	macro_rules! suffix {
1455	($($tt:tt)) => { extract!(suffix, $($tt)); }
1456	}
1457
1458	macro_rules! baseliteral {
1459	($($tt:tt)) => { extract!(basename_literal, $($tt)); }
1460	}
1461
1462	literal!(extract_lit1, "foo", Some(s("foo")));
1463	literal!(extract_lit2, "foo", None, CASEI);
1464	literal!(extract_lit3, "/foo", Some(s("/foo")));
1465	literal!(extract_lit4, "/foo/", Some(s("/foo/")));
1466	literal!(extract_lit5, "/foo/bar", Some(s("/foo/bar")));
1467	literal!(extract_lit6, "*.foo", None);
1468	literal!(extract_lit7, "foo/bar", Some(s("foo/bar")));
1469	literal!(extract_lit8, "**/foo/bar", None);
1470
1471	basetokens!(
1472	extract_basetoks1,
1473	"**/foo",
1474	Some(&*vec![Literal('f'), Literal('o'), Literal('o'),])
1475	);
1476	basetokens!(extract_basetoks2, "**/foo", None, CASEI);
1477	basetokens!(
1478	extract_basetoks3,
1479	"**/foo",
1480	Some(&*vec![Literal('f'), Literal('o'), Literal('o'),]),
1481	SLASHLIT
1482	);
1483	basetokens!(extract_basetoks4, "*foo", None, SLASHLIT);
1484	basetokens!(extract_basetoks5, "*foo", None);
1485	basetokens!(extract_basetoks6, "*/foo", None);
1486	basetokens!(
1487	extract_basetoks7,
1488	"*/foo",
1489	Some(&*vec![Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),]),
1490	SLASHLIT
1491	);
1492
1493	ext!(extract_ext1, "*/.rs", Some(s(".rs")));
1494	ext!(extract_ext2, "*/.rs.bak", None);
1495	ext!(extract_ext3, "*.rs", Some(s(".rs")));
1496	ext!(extract_ext4, "a*.rs", None);
1497	ext!(extract_ext5, "/*.c", None);
1498	ext!(extract_ext6, "*.c", None, SLASHLIT);
1499	ext!(extract_ext7, "*.c", Some(s(".c")));
1500
1501	required_ext!(extract_req_ext1, "*.rs", Some(s(".rs")));
1502	required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(s(".rs")));
1503	required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(s(".rs")));
1504	required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(s(".rs")));
1505	required_ext!(extract_req_ext5, ".rs", Some(s(".rs")));
1506	required_ext!(extract_req_ext6, "./rs", None);
1507	required_ext!(extract_req_ext7, "foo", None);
1508	required_ext!(extract_req_ext8, ".foo/", None);
1509	required_ext!(extract_req_ext9, "foo/", None);
1510
1511	prefix!(extract_prefix1, "/foo", Some(s("/foo")));
1512	prefix!(extract_prefix2, "/foo/*", Some(s("/foo/")));
1513	prefix!(extract_prefix3, "**/foo", None);
1514	prefix!(extract_prefix4, "foo/**", Some(s("foo/")));
1515
1516	suffix!(extract_suffix1, "**/foo/bar", Some((s("/foo/bar"), `true`)));
1517	suffix!(extract_suffix2, "*/foo/bar", Some((s("/foo/bar"), `false`)));
1518	suffix!(extract_suffix3, "*/foo/bar", None, SLASHLIT);
1519	suffix!(extract_suffix4, "foo/bar", Some((s("foo/bar"), `false`)));
1520	suffix!(extract_suffix5, "*.foo", Some((s(".foo"), `false`)));
1521	suffix!(extract_suffix6, "*.foo", None, SLASHLIT);
1522	suffix!(extract_suffix7, "*/_test", Some((s("_test"), `false`)));
1523
1524	baseliteral!(extract_baselit1, "**/foo", Some(s("foo")));
1525	baseliteral!(extract_baselit2, "foo", None);
1526	baseliteral!(extract_baselit3, "*foo", None);
1527	baseliteral!(extract_baselit4, "*/foo", None);
1528	}
1529