1/*!
2The types module provides a way of associating globs on file names to file
3types.
4
5This can be used to match specific types of files. For example, among
6the default file types provided, the Rust file type is defined to be `*.rs`
7with name `rust`. Similarly, the C file type is defined to be `*.{c,h}` with
8name `c`.
9
10Note that the set of default types may change over time.
11
12# Example
13
14This shows how to create and use a simple file type matcher using the default
15file types defined in this crate.
16
17```
18use ignore::types::TypesBuilder;
19
20let mut builder = TypesBuilder::new();
21builder.add_defaults();
22builder.select("rust");
23let matcher = builder.build().unwrap();
24
25assert!(matcher.matched("foo.rs", false).is_whitelist());
26assert!(matcher.matched("foo.c", false).is_ignore());
27```
28
29# Example: negation
30
31This is like the previous example, but shows how negating a file type works.
32That is, this will let us match file paths that *don't* correspond to a
33particular file type.
34
35```
36use ignore::types::TypesBuilder;
37
38let mut builder = TypesBuilder::new();
39builder.add_defaults();
40builder.negate("c");
41let matcher = builder.build().unwrap();
42
43assert!(matcher.matched("foo.rs", false).is_none());
44assert!(matcher.matched("foo.c", false).is_ignore());
45```
46
47# Example: custom file type definitions
48
49This shows how to extend this library default file type definitions with
50your own.
51
52```
53use ignore::types::TypesBuilder;
54
55let mut builder = TypesBuilder::new();
56builder.add_defaults();
57builder.add("foo", "*.foo");
58// Another way of adding a file type definition.
59// This is useful when accepting input from an end user.
60builder.add_def("bar:*.bar");
61// Note: we only select `foo`, not `bar`.
62builder.select("foo");
63let matcher = builder.build().unwrap();
64
65assert!(matcher.matched("x.foo", false).is_whitelist());
66// This is ignored because we only selected the `foo` file type.
67assert!(matcher.matched("x.bar", false).is_ignore());
68```
69
70We can also add file type definitions based on other definitions.
71
72```
73use ignore::types::TypesBuilder;
74
75let mut builder = TypesBuilder::new();
76builder.add_defaults();
77builder.add("foo", "*.foo");
78builder.add_def("bar:include:foo,cpp");
79builder.select("bar");
80let matcher = builder.build().unwrap();
81
82assert!(matcher.matched("x.foo", false).is_whitelist());
83assert!(matcher.matched("y.cpp", false).is_whitelist());
84```
85*/
86
87use std::cell::RefCell;
88use std::collections::HashMap;
89use std::path::Path;
90use std::sync::Arc;
91
92use globset::{GlobBuilder, GlobSet, GlobSetBuilder};
93use regex::Regex;
94use thread_local::ThreadLocal;
95
96use crate::default_types::DEFAULT_TYPES;
97use crate::pathutil::file_name;
98use crate::{Error, Match};
99
100/// Glob represents a single glob in a set of file type definitions.
101///
102/// There may be more than one glob for a particular file type.
103///
104/// This is used to report information about the highest precedent glob
105/// that matched.
106///
107/// Note that not all matches necessarily correspond to a specific glob.
108/// For example, if there are one or more selections and a file path doesn't
109/// match any of those selections, then the file path is considered to be
110/// ignored.
111///
112/// The lifetime `'a` refers to the lifetime of the underlying file type
113/// definition, which corresponds to the lifetime of the file type matcher.
114#[derive(Clone, Debug)]
115pub struct Glob<'a>(GlobInner<'a>);
116
117#[derive(Clone, Debug)]
118enum GlobInner<'a> {
119 /// No glob matched, but the file path should still be ignored.
120 UnmatchedIgnore,
121 /// A glob matched.
122 Matched {
123 /// The file type definition which provided the glob.
124 def: &'a FileTypeDef,
125 },
126}
127
128impl<'a> Glob<'a> {
129 fn unmatched() -> Glob<'a> {
130 Glob(GlobInner::UnmatchedIgnore)
131 }
132
133 /// Return the file type definition that matched, if one exists. A file type
134 /// definition always exists when a specific definition matches a file
135 /// path.
136 pub fn file_type_def(&self) -> Option<&FileTypeDef> {
137 match self {
138 Glob(GlobInner::UnmatchedIgnore) => None,
139 Glob(GlobInner::Matched { def: &&FileTypeDef, .. }) => Some(def),
140 }
141 }
142}
143
144/// A single file type definition.
145///
146/// File type definitions can be retrieved in aggregate from a file type
147/// matcher. File type definitions are also reported when its responsible
148/// for a match.
149#[derive(Clone, Debug, Eq, PartialEq)]
150pub struct FileTypeDef {
151 name: String,
152 globs: Vec<String>,
153}
154
155impl FileTypeDef {
156 /// Return the name of this file type.
157 pub fn name(&self) -> &str {
158 &self.name
159 }
160
161 /// Return the globs used to recognize this file type.
162 pub fn globs(&self) -> &[String] {
163 &self.globs
164 }
165}
166
167/// Types is a file type matcher.
168#[derive(Clone, Debug)]
169pub struct Types {
170 /// All of the file type definitions, sorted lexicographically by name.
171 defs: Vec<FileTypeDef>,
172 /// All of the selections made by the user.
173 selections: Vec<Selection<FileTypeDef>>,
174 /// Whether there is at least one Selection::Select in our selections.
175 /// When this is true, a Match::None is converted to Match::Ignore.
176 has_selected: bool,
177 /// A mapping from glob index in the set to two indices. The first is an
178 /// index into `selections` and the second is an index into the
179 /// corresponding file type definition's list of globs.
180 glob_to_selection: Vec<(usize, usize)>,
181 /// The set of all glob selections, used for actual matching.
182 set: GlobSet,
183 /// Temporary storage for globs that match.
184 matches: Arc<ThreadLocal<RefCell<Vec<usize>>>>,
185}
186
187/// Indicates the type of a selection for a particular file type.
188#[derive(Clone, Debug)]
189enum Selection<T> {
190 Select(String, T),
191 Negate(String, T),
192}
193
194impl<T> Selection<T> {
195 fn is_negated(&self) -> bool {
196 match *self {
197 Selection::Select(..) => false,
198 Selection::Negate(..) => true,
199 }
200 }
201
202 fn name(&self) -> &str {
203 match *self {
204 Selection::Select(ref name, _) => name,
205 Selection::Negate(ref name, _) => name,
206 }
207 }
208
209 fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Selection<U> {
210 match self {
211 Selection::Select(name, inner) => {
212 Selection::Select(name, f(inner))
213 }
214 Selection::Negate(name, inner) => {
215 Selection::Negate(name, f(inner))
216 }
217 }
218 }
219
220 fn inner(&self) -> &T {
221 match *self {
222 Selection::Select(_, ref inner) => inner,
223 Selection::Negate(_, ref inner) => inner,
224 }
225 }
226}
227
228impl Types {
229 /// Creates a new file type matcher that never matches any path and
230 /// contains no file type definitions.
231 pub fn empty() -> Types {
232 Types {
233 defs: vec![],
234 selections: vec![],
235 has_selected: false,
236 glob_to_selection: vec![],
237 set: GlobSetBuilder::new().build().unwrap(),
238 matches: Arc::new(ThreadLocal::default()),
239 }
240 }
241
242 /// Returns true if and only if this matcher has zero selections.
243 pub fn is_empty(&self) -> bool {
244 self.selections.is_empty()
245 }
246
247 /// Returns the number of selections used in this matcher.
248 pub fn len(&self) -> usize {
249 self.selections.len()
250 }
251
252 /// Return the set of current file type definitions.
253 ///
254 /// Definitions and globs are sorted.
255 pub fn definitions(&self) -> &[FileTypeDef] {
256 &self.defs
257 }
258
259 /// Returns a match for the given path against this file type matcher.
260 ///
261 /// The path is considered whitelisted if it matches a selected file type.
262 /// The path is considered ignored if it matches a negated file type.
263 /// If at least one file type is selected and `path` doesn't match, then
264 /// the path is also considered ignored.
265 pub fn matched<'a, P: AsRef<Path>>(
266 &'a self,
267 path: P,
268 is_dir: bool,
269 ) -> Match<Glob<'a>> {
270 // File types don't apply to directories, and we can't do anything
271 // if our glob set is empty.
272 if is_dir || self.set.is_empty() {
273 return Match::None;
274 }
275 // We only want to match against the file name, so extract it.
276 // If one doesn't exist, then we can't match it.
277 let name = match file_name(path.as_ref()) {
278 Some(name) => name,
279 None if self.has_selected => {
280 return Match::Ignore(Glob::unmatched());
281 }
282 None => {
283 return Match::None;
284 }
285 };
286 let mut matches = self.matches.get_or_default().borrow_mut();
287 self.set.matches_into(name, &mut *matches);
288 // The highest precedent match is the last one.
289 if let Some(&i) = matches.last() {
290 let (isel, _) = self.glob_to_selection[i];
291 let sel = &self.selections[isel];
292 let glob = Glob(GlobInner::Matched { def: sel.inner() });
293 return if sel.is_negated() {
294 Match::Ignore(glob)
295 } else {
296 Match::Whitelist(glob)
297 };
298 }
299 if self.has_selected {
300 Match::Ignore(Glob::unmatched())
301 } else {
302 Match::None
303 }
304 }
305}
306
307/// TypesBuilder builds a type matcher from a set of file type definitions and
308/// a set of file type selections.
309pub struct TypesBuilder {
310 types: HashMap<String, FileTypeDef>,
311 selections: Vec<Selection<()>>,
312}
313
314impl TypesBuilder {
315 /// Create a new builder for a file type matcher.
316 ///
317 /// The builder contains *no* type definitions to start with. A set
318 /// of default type definitions can be added with `add_defaults`, and
319 /// additional type definitions can be added with `select` and `negate`.
320 pub fn new() -> TypesBuilder {
321 TypesBuilder { types: HashMap::new(), selections: vec![] }
322 }
323
324 /// Build the current set of file type definitions *and* selections into
325 /// a file type matcher.
326 pub fn build(&self) -> Result<Types, Error> {
327 let defs = self.definitions();
328 let has_selected = self.selections.iter().any(|s| !s.is_negated());
329
330 let mut selections = vec![];
331 let mut glob_to_selection = vec![];
332 let mut build_set = GlobSetBuilder::new();
333 for (isel, selection) in self.selections.iter().enumerate() {
334 let def = match self.types.get(selection.name()) {
335 Some(def) => def.clone(),
336 None => {
337 let name = selection.name().to_string();
338 return Err(Error::UnrecognizedFileType(name));
339 }
340 };
341 for (iglob, glob) in def.globs.iter().enumerate() {
342 build_set.add(
343 GlobBuilder::new(glob)
344 .literal_separator(true)
345 .build()
346 .map_err(|err| Error::Glob {
347 glob: Some(glob.to_string()),
348 err: err.kind().to_string(),
349 })?,
350 );
351 glob_to_selection.push((isel, iglob));
352 }
353 selections.push(selection.clone().map(move |_| def));
354 }
355 let set = build_set
356 .build()
357 .map_err(|err| Error::Glob { glob: None, err: err.to_string() })?;
358 Ok(Types {
359 defs: defs,
360 selections: selections,
361 has_selected: has_selected,
362 glob_to_selection: glob_to_selection,
363 set: set,
364 matches: Arc::new(ThreadLocal::default()),
365 })
366 }
367
368 /// Return the set of current file type definitions.
369 ///
370 /// Definitions and globs are sorted.
371 pub fn definitions(&self) -> Vec<FileTypeDef> {
372 let mut defs = vec![];
373 for def in self.types.values() {
374 let mut def = def.clone();
375 def.globs.sort();
376 defs.push(def);
377 }
378 defs.sort_by(|def1, def2| def1.name().cmp(def2.name()));
379 defs
380 }
381
382 /// Select the file type given by `name`.
383 ///
384 /// If `name` is `all`, then all file types currently defined are selected.
385 pub fn select(&mut self, name: &str) -> &mut TypesBuilder {
386 if name == "all" {
387 for name in self.types.keys() {
388 self.selections.push(Selection::Select(name.to_string(), ()));
389 }
390 } else {
391 self.selections.push(Selection::Select(name.to_string(), ()));
392 }
393 self
394 }
395
396 /// Ignore the file type given by `name`.
397 ///
398 /// If `name` is `all`, then all file types currently defined are negated.
399 pub fn negate(&mut self, name: &str) -> &mut TypesBuilder {
400 if name == "all" {
401 for name in self.types.keys() {
402 self.selections.push(Selection::Negate(name.to_string(), ()));
403 }
404 } else {
405 self.selections.push(Selection::Negate(name.to_string(), ()));
406 }
407 self
408 }
409
410 /// Clear any file type definitions for the type name given.
411 pub fn clear(&mut self, name: &str) -> &mut TypesBuilder {
412 self.types.remove(name);
413 self
414 }
415
416 /// Add a new file type definition. `name` can be arbitrary and `pat`
417 /// should be a glob recognizing file paths belonging to the `name` type.
418 ///
419 /// If `name` is `all` or otherwise contains any character that is not a
420 /// Unicode letter or number, then an error is returned.
421 pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> {
422 lazy_static::lazy_static! {
423 static ref RE: Regex = Regex::new(r"^[\pL\pN]+$").unwrap();
424 };
425 if name == "all" || !RE.is_match(name) {
426 return Err(Error::InvalidDefinition);
427 }
428 let (key, glob) = (name.to_string(), glob.to_string());
429 self.types
430 .entry(key)
431 .or_insert_with(|| FileTypeDef {
432 name: name.to_string(),
433 globs: vec![],
434 })
435 .globs
436 .push(glob);
437 Ok(())
438 }
439
440 /// Add a new file type definition specified in string form. There are two
441 /// valid formats:
442 /// 1. `{name}:{glob}`. This defines a 'root' definition that associates the
443 /// given name with the given glob.
444 /// 2. `{name}:include:{comma-separated list of already defined names}.
445 /// This defines an 'include' definition that associates the given name
446 /// with the definitions of the given existing types.
447 /// Names may not include any characters that are not
448 /// Unicode letters or numbers.
449 pub fn add_def(&mut self, def: &str) -> Result<(), Error> {
450 let parts: Vec<&str> = def.split(':').collect();
451 match parts.len() {
452 2 => {
453 let name = parts[0];
454 let glob = parts[1];
455 if name.is_empty() || glob.is_empty() {
456 return Err(Error::InvalidDefinition);
457 }
458 self.add(name, glob)
459 }
460 3 => {
461 let name = parts[0];
462 let types_string = parts[2];
463 if name.is_empty()
464 || parts[1] != "include"
465 || types_string.is_empty()
466 {
467 return Err(Error::InvalidDefinition);
468 }
469 let types = types_string.split(',');
470 // Check ahead of time to ensure that all types specified are
471 // present and fail fast if not.
472 if types.clone().any(|t| !self.types.contains_key(t)) {
473 return Err(Error::InvalidDefinition);
474 }
475 for type_name in types {
476 let globs =
477 self.types.get(type_name).unwrap().globs.clone();
478 for glob in globs {
479 self.add(name, &glob)?;
480 }
481 }
482 Ok(())
483 }
484 _ => Err(Error::InvalidDefinition),
485 }
486 }
487
488 /// Add a set of default file type definitions.
489 pub fn add_defaults(&mut self) -> &mut TypesBuilder {
490 static MSG: &'static str = "adding a default type should never fail";
491 for &(name, exts) in DEFAULT_TYPES {
492 for ext in exts {
493 self.add(name, ext).expect(MSG);
494 }
495 }
496 self
497 }
498}
499
500#[cfg(test)]
501mod tests {
502 use super::TypesBuilder;
503
504 macro_rules! matched {
505 ($name:ident, $types:expr, $sel:expr, $selnot:expr,
506 $path:expr) => {
507 matched!($name, $types, $sel, $selnot, $path, true);
508 };
509 (not, $name:ident, $types:expr, $sel:expr, $selnot:expr,
510 $path:expr) => {
511 matched!($name, $types, $sel, $selnot, $path, false);
512 };
513 ($name:ident, $types:expr, $sel:expr, $selnot:expr,
514 $path:expr, $matched:expr) => {
515 #[test]
516 fn $name() {
517 let mut btypes = TypesBuilder::new();
518 for tydef in $types {
519 btypes.add_def(tydef).unwrap();
520 }
521 for sel in $sel {
522 btypes.select(sel);
523 }
524 for selnot in $selnot {
525 btypes.negate(selnot);
526 }
527 let types = btypes.build().unwrap();
528 let mat = types.matched($path, false);
529 assert_eq!($matched, !mat.is_ignore());
530 }
531 };
532 }
533
534 fn types() -> Vec<&'static str> {
535 vec![
536 "html:*.html",
537 "html:*.htm",
538 "rust:*.rs",
539 "js:*.js",
540 "foo:*.{rs,foo}",
541 "combo:include:html,rust",
542 ]
543 }
544
545 matched!(match1, types(), vec!["rust"], vec![], "lib.rs");
546 matched!(match2, types(), vec!["html"], vec![], "index.html");
547 matched!(match3, types(), vec!["html"], vec![], "index.htm");
548 matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs");
549 matched!(match5, types(), vec![], vec![], "index.html");
550 matched!(match6, types(), vec![], vec!["rust"], "index.html");
551 matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo");
552 matched!(match8, types(), vec!["combo"], vec![], "index.html");
553 matched!(match9, types(), vec!["combo"], vec![], "lib.rs");
554
555 matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html");
556 matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs");
557 matched!(not, matchnot3, types(), vec!["foo"], vec!["rust"], "main.rs");
558 matched!(not, matchnot4, types(), vec!["rust"], vec!["foo"], "main.rs");
559 matched!(not, matchnot5, types(), vec!["rust"], vec!["foo"], "main.foo");
560 matched!(not, matchnot6, types(), vec!["combo"], vec![], "leftpad.js");
561
562 #[test]
563 fn test_invalid_defs() {
564 let mut btypes = TypesBuilder::new();
565 for tydef in types() {
566 btypes.add_def(tydef).unwrap();
567 }
568 // Preserve the original definitions for later comparison.
569 let original_defs = btypes.definitions();
570 let bad_defs = vec![
571 // Reference to type that does not exist
572 "combo:include:html,python",
573 // Bad format
574 "combo:foobar:html,rust",
575 "",
576 ];
577 for def in bad_defs {
578 assert!(btypes.add_def(def).is_err());
579 // Ensure that nothing changed, even if some of the includes were valid.
580 assert_eq!(btypes.definitions(), original_defs);
581 }
582 }
583}
584