1/*!
2The types module provides a way of associating globs on file names to file
3types.
4
5This can be used to match specific types of files. For example, among
6the default file types provided, the Rust file type is defined to be `*.rs`
7with name `rust`. Similarly, the C file type is defined to be `*.{c,h}` with
8name `c`.
9
10Note that the set of default types may change over time.
11
12# Example
13
14This shows how to create and use a simple file type matcher using the default
15file types defined in this crate.
16
17```
18use ignore::types::TypesBuilder;
19
20let mut builder = TypesBuilder::new();
21builder.add_defaults();
22builder.select("rust");
23let matcher = builder.build().unwrap();
24
25assert!(matcher.matched("foo.rs", false).is_whitelist());
26assert!(matcher.matched("foo.c", false).is_ignore());
27```
28
29# Example: negation
30
31This is like the previous example, but shows how negating a file type works.
32That is, this will let us match file paths that *don't* correspond to a
33particular file type.
34
35```
36use ignore::types::TypesBuilder;
37
38let mut builder = TypesBuilder::new();
39builder.add_defaults();
40builder.negate("c");
41let matcher = builder.build().unwrap();
42
43assert!(matcher.matched("foo.rs", false).is_none());
44assert!(matcher.matched("foo.c", false).is_ignore());
45```
46
47# Example: custom file type definitions
48
49This shows how to extend this library default file type definitions with
50your own.
51
52```
53use ignore::types::TypesBuilder;
54
55let mut builder = TypesBuilder::new();
56builder.add_defaults();
57builder.add("foo", "*.foo");
58// Another way of adding a file type definition.
59// This is useful when accepting input from an end user.
60builder.add_def("bar:*.bar");
61// Note: we only select `foo`, not `bar`.
62builder.select("foo");
63let matcher = builder.build().unwrap();
64
65assert!(matcher.matched("x.foo", false).is_whitelist());
66// This is ignored because we only selected the `foo` file type.
67assert!(matcher.matched("x.bar", false).is_ignore());
68```
69
70We can also add file type definitions based on other definitions.
71
72```
73use ignore::types::TypesBuilder;
74
75let mut builder = TypesBuilder::new();
76builder.add_defaults();
77builder.add("foo", "*.foo");
78builder.add_def("bar:include:foo,cpp");
79builder.select("bar");
80let matcher = builder.build().unwrap();
81
82assert!(matcher.matched("x.foo", false).is_whitelist());
83assert!(matcher.matched("y.cpp", false).is_whitelist());
84```
85*/
86
87use std::{collections::HashMap, path::Path, sync::Arc};
88
89use {
90 globset::{GlobBuilder, GlobSet, GlobSetBuilder},
91 regex_automata::util::pool::Pool,
92};
93
94use crate::{default_types::DEFAULT_TYPES, pathutil::file_name, Error, Match};
95
96/// Glob represents a single glob in a set of file type definitions.
97///
98/// There may be more than one glob for a particular file type.
99///
100/// This is used to report information about the highest precedent glob
101/// that matched.
102///
103/// Note that not all matches necessarily correspond to a specific glob.
104/// For example, if there are one or more selections and a file path doesn't
105/// match any of those selections, then the file path is considered to be
106/// ignored.
107///
108/// The lifetime `'a` refers to the lifetime of the underlying file type
109/// definition, which corresponds to the lifetime of the file type matcher.
110#[derive(Clone, Debug)]
111pub struct Glob<'a>(GlobInner<'a>);
112
113#[derive(Clone, Debug)]
114enum GlobInner<'a> {
115 /// No glob matched, but the file path should still be ignored.
116 UnmatchedIgnore,
117 /// A glob matched.
118 Matched {
119 /// The file type definition which provided the glob.
120 def: &'a FileTypeDef,
121 },
122}
123
124impl<'a> Glob<'a> {
125 fn unmatched() -> Glob<'a> {
126 Glob(GlobInner::UnmatchedIgnore)
127 }
128
129 /// Return the file type definition that matched, if one exists. A file type
130 /// definition always exists when a specific definition matches a file
131 /// path.
132 pub fn file_type_def(&self) -> Option<&FileTypeDef> {
133 match self {
134 Glob(GlobInner::UnmatchedIgnore) => None,
135 Glob(GlobInner::Matched { def: &&FileTypeDef, .. }) => Some(def),
136 }
137 }
138}
139
140/// A single file type definition.
141///
142/// File type definitions can be retrieved in aggregate from a file type
143/// matcher. File type definitions are also reported when its responsible
144/// for a match.
145#[derive(Clone, Debug, Eq, PartialEq)]
146pub struct FileTypeDef {
147 name: String,
148 globs: Vec<String>,
149}
150
151impl FileTypeDef {
152 /// Return the name of this file type.
153 pub fn name(&self) -> &str {
154 &self.name
155 }
156
157 /// Return the globs used to recognize this file type.
158 pub fn globs(&self) -> &[String] {
159 &self.globs
160 }
161}
162
163/// Types is a file type matcher.
164#[derive(Clone, Debug)]
165pub struct Types {
166 /// All of the file type definitions, sorted lexicographically by name.
167 defs: Vec<FileTypeDef>,
168 /// All of the selections made by the user.
169 selections: Vec<Selection<FileTypeDef>>,
170 /// Whether there is at least one Selection::Select in our selections.
171 /// When this is true, a Match::None is converted to Match::Ignore.
172 has_selected: bool,
173 /// A mapping from glob index in the set to two indices. The first is an
174 /// index into `selections` and the second is an index into the
175 /// corresponding file type definition's list of globs.
176 glob_to_selection: Vec<(usize, usize)>,
177 /// The set of all glob selections, used for actual matching.
178 set: GlobSet,
179 /// Temporary storage for globs that match.
180 matches: Arc<Pool<Vec<usize>>>,
181}
182
183/// Indicates the type of a selection for a particular file type.
184#[derive(Clone, Debug)]
185enum Selection<T> {
186 Select(String, T),
187 Negate(String, T),
188}
189
190impl<T> Selection<T> {
191 fn is_negated(&self) -> bool {
192 match *self {
193 Selection::Select(..) => false,
194 Selection::Negate(..) => true,
195 }
196 }
197
198 fn name(&self) -> &str {
199 match *self {
200 Selection::Select(ref name, _) => name,
201 Selection::Negate(ref name, _) => name,
202 }
203 }
204
205 fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Selection<U> {
206 match self {
207 Selection::Select(name, inner) => {
208 Selection::Select(name, f(inner))
209 }
210 Selection::Negate(name, inner) => {
211 Selection::Negate(name, f(inner))
212 }
213 }
214 }
215
216 fn inner(&self) -> &T {
217 match *self {
218 Selection::Select(_, ref inner) => inner,
219 Selection::Negate(_, ref inner) => inner,
220 }
221 }
222}
223
224impl Types {
225 /// Creates a new file type matcher that never matches any path and
226 /// contains no file type definitions.
227 pub fn empty() -> Types {
228 Types {
229 defs: vec![],
230 selections: vec![],
231 has_selected: false,
232 glob_to_selection: vec![],
233 set: GlobSetBuilder::new().build().unwrap(),
234 matches: Arc::new(Pool::new(|| vec![])),
235 }
236 }
237
238 /// Returns true if and only if this matcher has zero selections.
239 pub fn is_empty(&self) -> bool {
240 self.selections.is_empty()
241 }
242
243 /// Returns the number of selections used in this matcher.
244 pub fn len(&self) -> usize {
245 self.selections.len()
246 }
247
248 /// Return the set of current file type definitions.
249 ///
250 /// Definitions and globs are sorted.
251 pub fn definitions(&self) -> &[FileTypeDef] {
252 &self.defs
253 }
254
255 /// Returns a match for the given path against this file type matcher.
256 ///
257 /// The path is considered whitelisted if it matches a selected file type.
258 /// The path is considered ignored if it matches a negated file type.
259 /// If at least one file type is selected and `path` doesn't match, then
260 /// the path is also considered ignored.
261 pub fn matched<'a, P: AsRef<Path>>(
262 &'a self,
263 path: P,
264 is_dir: bool,
265 ) -> Match<Glob<'a>> {
266 // File types don't apply to directories, and we can't do anything
267 // if our glob set is empty.
268 if is_dir || self.set.is_empty() {
269 return Match::None;
270 }
271 // We only want to match against the file name, so extract it.
272 // If one doesn't exist, then we can't match it.
273 let name = match file_name(path.as_ref()) {
274 Some(name) => name,
275 None if self.has_selected => {
276 return Match::Ignore(Glob::unmatched());
277 }
278 None => {
279 return Match::None;
280 }
281 };
282 let mut matches = self.matches.get();
283 self.set.matches_into(name, &mut *matches);
284 // The highest precedent match is the last one.
285 if let Some(&i) = matches.last() {
286 let (isel, _) = self.glob_to_selection[i];
287 let sel = &self.selections[isel];
288 let glob = Glob(GlobInner::Matched { def: sel.inner() });
289 return if sel.is_negated() {
290 Match::Ignore(glob)
291 } else {
292 Match::Whitelist(glob)
293 };
294 }
295 if self.has_selected {
296 Match::Ignore(Glob::unmatched())
297 } else {
298 Match::None
299 }
300 }
301}
302
303/// TypesBuilder builds a type matcher from a set of file type definitions and
304/// a set of file type selections.
305pub struct TypesBuilder {
306 types: HashMap<String, FileTypeDef>,
307 selections: Vec<Selection<()>>,
308}
309
310impl TypesBuilder {
311 /// Create a new builder for a file type matcher.
312 ///
313 /// The builder contains *no* type definitions to start with. A set
314 /// of default type definitions can be added with `add_defaults`, and
315 /// additional type definitions can be added with `select` and `negate`.
316 pub fn new() -> TypesBuilder {
317 TypesBuilder { types: HashMap::new(), selections: vec![] }
318 }
319
320 /// Build the current set of file type definitions *and* selections into
321 /// a file type matcher.
322 pub fn build(&self) -> Result<Types, Error> {
323 let defs = self.definitions();
324 let has_selected = self.selections.iter().any(|s| !s.is_negated());
325
326 let mut selections = vec![];
327 let mut glob_to_selection = vec![];
328 let mut build_set = GlobSetBuilder::new();
329 for (isel, selection) in self.selections.iter().enumerate() {
330 let def = match self.types.get(selection.name()) {
331 Some(def) => def.clone(),
332 None => {
333 let name = selection.name().to_string();
334 return Err(Error::UnrecognizedFileType(name));
335 }
336 };
337 for (iglob, glob) in def.globs.iter().enumerate() {
338 build_set.add(
339 GlobBuilder::new(glob)
340 .literal_separator(true)
341 .build()
342 .map_err(|err| Error::Glob {
343 glob: Some(glob.to_string()),
344 err: err.kind().to_string(),
345 })?,
346 );
347 glob_to_selection.push((isel, iglob));
348 }
349 selections.push(selection.clone().map(move |_| def));
350 }
351 let set = build_set
352 .build()
353 .map_err(|err| Error::Glob { glob: None, err: err.to_string() })?;
354 Ok(Types {
355 defs,
356 selections,
357 has_selected,
358 glob_to_selection,
359 set,
360 matches: Arc::new(Pool::new(|| vec![])),
361 })
362 }
363
364 /// Return the set of current file type definitions.
365 ///
366 /// Definitions and globs are sorted.
367 pub fn definitions(&self) -> Vec<FileTypeDef> {
368 let mut defs = vec![];
369 for def in self.types.values() {
370 let mut def = def.clone();
371 def.globs.sort();
372 defs.push(def);
373 }
374 defs.sort_by(|def1, def2| def1.name().cmp(def2.name()));
375 defs
376 }
377
378 /// Select the file type given by `name`.
379 ///
380 /// If `name` is `all`, then all file types currently defined are selected.
381 pub fn select(&mut self, name: &str) -> &mut TypesBuilder {
382 if name == "all" {
383 for name in self.types.keys() {
384 self.selections.push(Selection::Select(name.to_string(), ()));
385 }
386 } else {
387 self.selections.push(Selection::Select(name.to_string(), ()));
388 }
389 self
390 }
391
392 /// Ignore the file type given by `name`.
393 ///
394 /// If `name` is `all`, then all file types currently defined are negated.
395 pub fn negate(&mut self, name: &str) -> &mut TypesBuilder {
396 if name == "all" {
397 for name in self.types.keys() {
398 self.selections.push(Selection::Negate(name.to_string(), ()));
399 }
400 } else {
401 self.selections.push(Selection::Negate(name.to_string(), ()));
402 }
403 self
404 }
405
406 /// Clear any file type definitions for the type name given.
407 pub fn clear(&mut self, name: &str) -> &mut TypesBuilder {
408 self.types.remove(name);
409 self
410 }
411
412 /// Add a new file type definition. `name` can be arbitrary and `pat`
413 /// should be a glob recognizing file paths belonging to the `name` type.
414 ///
415 /// If `name` is `all` or otherwise contains any character that is not a
416 /// Unicode letter or number, then an error is returned.
417 pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> {
418 if name == "all" || !name.chars().all(|c| c.is_alphanumeric()) {
419 return Err(Error::InvalidDefinition);
420 }
421 let (key, glob) = (name.to_string(), glob.to_string());
422 self.types
423 .entry(key)
424 .or_insert_with(|| FileTypeDef {
425 name: name.to_string(),
426 globs: vec![],
427 })
428 .globs
429 .push(glob);
430 Ok(())
431 }
432
433 /// Add a new file type definition specified in string form. There are two
434 /// valid formats:
435 /// 1. `{name}:{glob}`. This defines a 'root' definition that associates the
436 /// given name with the given glob.
437 /// 2. `{name}:include:{comma-separated list of already defined names}.
438 /// This defines an 'include' definition that associates the given name
439 /// with the definitions of the given existing types.
440 /// Names may not include any characters that are not
441 /// Unicode letters or numbers.
442 pub fn add_def(&mut self, def: &str) -> Result<(), Error> {
443 let parts: Vec<&str> = def.split(':').collect();
444 match parts.len() {
445 2 => {
446 let name = parts[0];
447 let glob = parts[1];
448 if name.is_empty() || glob.is_empty() {
449 return Err(Error::InvalidDefinition);
450 }
451 self.add(name, glob)
452 }
453 3 => {
454 let name = parts[0];
455 let types_string = parts[2];
456 if name.is_empty()
457 || parts[1] != "include"
458 || types_string.is_empty()
459 {
460 return Err(Error::InvalidDefinition);
461 }
462 let types = types_string.split(',');
463 // Check ahead of time to ensure that all types specified are
464 // present and fail fast if not.
465 if types.clone().any(|t| !self.types.contains_key(t)) {
466 return Err(Error::InvalidDefinition);
467 }
468 for type_name in types {
469 let globs =
470 self.types.get(type_name).unwrap().globs.clone();
471 for glob in globs {
472 self.add(name, &glob)?;
473 }
474 }
475 Ok(())
476 }
477 _ => Err(Error::InvalidDefinition),
478 }
479 }
480
481 /// Add a set of default file type definitions.
482 pub fn add_defaults(&mut self) -> &mut TypesBuilder {
483 static MSG: &'static str = "adding a default type should never fail";
484 for &(names, exts) in DEFAULT_TYPES {
485 for name in names {
486 for ext in exts {
487 self.add(name, ext).expect(MSG);
488 }
489 }
490 }
491 self
492 }
493}
494
495#[cfg(test)]
496mod tests {
497 use super::TypesBuilder;
498
499 macro_rules! matched {
500 ($name:ident, $types:expr, $sel:expr, $selnot:expr,
501 $path:expr) => {
502 matched!($name, $types, $sel, $selnot, $path, true);
503 };
504 (not, $name:ident, $types:expr, $sel:expr, $selnot:expr,
505 $path:expr) => {
506 matched!($name, $types, $sel, $selnot, $path, false);
507 };
508 ($name:ident, $types:expr, $sel:expr, $selnot:expr,
509 $path:expr, $matched:expr) => {
510 #[test]
511 fn $name() {
512 let mut btypes = TypesBuilder::new();
513 for tydef in $types {
514 btypes.add_def(tydef).unwrap();
515 }
516 for sel in $sel {
517 btypes.select(sel);
518 }
519 for selnot in $selnot {
520 btypes.negate(selnot);
521 }
522 let types = btypes.build().unwrap();
523 let mat = types.matched($path, false);
524 assert_eq!($matched, !mat.is_ignore());
525 }
526 };
527 }
528
529 fn types() -> Vec<&'static str> {
530 vec![
531 "html:*.html",
532 "html:*.htm",
533 "rust:*.rs",
534 "js:*.js",
535 "py:*.py",
536 "python:*.py",
537 "foo:*.{rs,foo}",
538 "combo:include:html,rust",
539 ]
540 }
541
542 matched!(match1, types(), vec!["rust"], vec![], "lib.rs");
543 matched!(match2, types(), vec!["html"], vec![], "index.html");
544 matched!(match3, types(), vec!["html"], vec![], "index.htm");
545 matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs");
546 matched!(match5, types(), vec![], vec![], "index.html");
547 matched!(match6, types(), vec![], vec!["rust"], "index.html");
548 matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo");
549 matched!(match8, types(), vec!["combo"], vec![], "index.html");
550 matched!(match9, types(), vec!["combo"], vec![], "lib.rs");
551 matched!(match10, types(), vec!["py"], vec![], "main.py");
552 matched!(match11, types(), vec!["python"], vec![], "main.py");
553
554 matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html");
555 matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs");
556 matched!(not, matchnot3, types(), vec!["foo"], vec!["rust"], "main.rs");
557 matched!(not, matchnot4, types(), vec!["rust"], vec!["foo"], "main.rs");
558 matched!(not, matchnot5, types(), vec!["rust"], vec!["foo"], "main.foo");
559 matched!(not, matchnot6, types(), vec!["combo"], vec![], "leftpad.js");
560 matched!(not, matchnot7, types(), vec!["py"], vec![], "index.html");
561 matched!(not, matchnot8, types(), vec!["python"], vec![], "doc.md");
562
563 #[test]
564 fn test_invalid_defs() {
565 let mut btypes = TypesBuilder::new();
566 for tydef in types() {
567 btypes.add_def(tydef).unwrap();
568 }
569 // Preserve the original definitions for later comparison.
570 let original_defs = btypes.definitions();
571 let bad_defs = vec![
572 // Reference to type that does not exist
573 "combo:include:html,qwerty",
574 // Bad format
575 "combo:foobar:html,rust",
576 "",
577 ];
578 for def in bad_defs {
579 assert!(btypes.add_def(def).is_err());
580 // Ensure that nothing changed, even if some of the includes were valid.
581 assert_eq!(btypes.definitions(), original_defs);
582 }
583 }
584}
585