1 | /*! |
2 | The types module provides a way of associating globs on file names to file |
3 | types. |
4 | |
5 | This can be used to match specific types of files. For example, among |
6 | the default file types provided, the Rust file type is defined to be `*.rs` |
7 | with name `rust`. Similarly, the C file type is defined to be `*.{c,h}` with |
8 | name `c`. |
9 | |
10 | Note that the set of default types may change over time. |
11 | |
12 | # Example |
13 | |
14 | This shows how to create and use a simple file type matcher using the default |
15 | file types defined in this crate. |
16 | |
17 | ``` |
18 | use ignore::types::TypesBuilder; |
19 | |
20 | let mut builder = TypesBuilder::new(); |
21 | builder.add_defaults(); |
22 | builder.select("rust" ); |
23 | let matcher = builder.build().unwrap(); |
24 | |
25 | assert!(matcher.matched("foo.rs" , false).is_whitelist()); |
26 | assert!(matcher.matched("foo.c" , false).is_ignore()); |
27 | ``` |
28 | |
29 | # Example: negation |
30 | |
31 | This is like the previous example, but shows how negating a file type works. |
32 | That is, this will let us match file paths that *don't* correspond to a |
33 | particular file type. |
34 | |
35 | ``` |
36 | use ignore::types::TypesBuilder; |
37 | |
38 | let mut builder = TypesBuilder::new(); |
39 | builder.add_defaults(); |
40 | builder.negate("c" ); |
41 | let matcher = builder.build().unwrap(); |
42 | |
43 | assert!(matcher.matched("foo.rs" , false).is_none()); |
44 | assert!(matcher.matched("foo.c" , false).is_ignore()); |
45 | ``` |
46 | |
47 | # Example: custom file type definitions |
48 | |
49 | This shows how to extend this library default file type definitions with |
50 | your own. |
51 | |
52 | ``` |
53 | use ignore::types::TypesBuilder; |
54 | |
55 | let mut builder = TypesBuilder::new(); |
56 | builder.add_defaults(); |
57 | builder.add("foo" , "*.foo" ); |
58 | // Another way of adding a file type definition. |
59 | // This is useful when accepting input from an end user. |
60 | builder.add_def("bar:*.bar" ); |
61 | // Note: we only select `foo`, not `bar`. |
62 | builder.select("foo" ); |
63 | let matcher = builder.build().unwrap(); |
64 | |
65 | assert!(matcher.matched("x.foo" , false).is_whitelist()); |
66 | // This is ignored because we only selected the `foo` file type. |
67 | assert!(matcher.matched("x.bar" , false).is_ignore()); |
68 | ``` |
69 | |
70 | We can also add file type definitions based on other definitions. |
71 | |
72 | ``` |
73 | use ignore::types::TypesBuilder; |
74 | |
75 | let mut builder = TypesBuilder::new(); |
76 | builder.add_defaults(); |
77 | builder.add("foo" , "*.foo" ); |
78 | builder.add_def("bar:include:foo,cpp" ); |
79 | builder.select("bar" ); |
80 | let matcher = builder.build().unwrap(); |
81 | |
82 | assert!(matcher.matched("x.foo" , false).is_whitelist()); |
83 | assert!(matcher.matched("y.cpp" , false).is_whitelist()); |
84 | ``` |
85 | */ |
86 | |
87 | use std::{collections::HashMap, path::Path, sync::Arc}; |
88 | |
89 | use { |
90 | globset::{GlobBuilder, GlobSet, GlobSetBuilder}, |
91 | regex_automata::util::pool::Pool, |
92 | }; |
93 | |
94 | use crate::{default_types::DEFAULT_TYPES, pathutil::file_name, Error, Match}; |
95 | |
96 | /// Glob represents a single glob in a set of file type definitions. |
97 | /// |
98 | /// There may be more than one glob for a particular file type. |
99 | /// |
100 | /// This is used to report information about the highest precedent glob |
101 | /// that matched. |
102 | /// |
103 | /// Note that not all matches necessarily correspond to a specific glob. |
104 | /// For example, if there are one or more selections and a file path doesn't |
105 | /// match any of those selections, then the file path is considered to be |
106 | /// ignored. |
107 | /// |
108 | /// The lifetime `'a` refers to the lifetime of the underlying file type |
109 | /// definition, which corresponds to the lifetime of the file type matcher. |
110 | #[derive (Clone, Debug)] |
111 | pub struct Glob<'a>(GlobInner<'a>); |
112 | |
113 | #[derive (Clone, Debug)] |
114 | enum GlobInner<'a> { |
115 | /// No glob matched, but the file path should still be ignored. |
116 | UnmatchedIgnore, |
117 | /// A glob matched. |
118 | Matched { |
119 | /// The file type definition which provided the glob. |
120 | def: &'a FileTypeDef, |
121 | }, |
122 | } |
123 | |
124 | impl<'a> Glob<'a> { |
125 | fn unmatched() -> Glob<'a> { |
126 | Glob(GlobInner::UnmatchedIgnore) |
127 | } |
128 | |
129 | /// Return the file type definition that matched, if one exists. A file type |
130 | /// definition always exists when a specific definition matches a file |
131 | /// path. |
132 | pub fn file_type_def(&self) -> Option<&FileTypeDef> { |
133 | match self { |
134 | Glob(GlobInner::UnmatchedIgnore) => None, |
135 | Glob(GlobInner::Matched { def: &&FileTypeDef, .. }) => Some(def), |
136 | } |
137 | } |
138 | } |
139 | |
140 | /// A single file type definition. |
141 | /// |
142 | /// File type definitions can be retrieved in aggregate from a file type |
143 | /// matcher. File type definitions are also reported when its responsible |
144 | /// for a match. |
145 | #[derive (Clone, Debug, Eq, PartialEq)] |
146 | pub struct FileTypeDef { |
147 | name: String, |
148 | globs: Vec<String>, |
149 | } |
150 | |
151 | impl FileTypeDef { |
152 | /// Return the name of this file type. |
153 | pub fn name(&self) -> &str { |
154 | &self.name |
155 | } |
156 | |
157 | /// Return the globs used to recognize this file type. |
158 | pub fn globs(&self) -> &[String] { |
159 | &self.globs |
160 | } |
161 | } |
162 | |
163 | /// Types is a file type matcher. |
164 | #[derive (Clone, Debug)] |
165 | pub struct Types { |
166 | /// All of the file type definitions, sorted lexicographically by name. |
167 | defs: Vec<FileTypeDef>, |
168 | /// All of the selections made by the user. |
169 | selections: Vec<Selection<FileTypeDef>>, |
170 | /// Whether there is at least one Selection::Select in our selections. |
171 | /// When this is true, a Match::None is converted to Match::Ignore. |
172 | has_selected: bool, |
173 | /// A mapping from glob index in the set to two indices. The first is an |
174 | /// index into `selections` and the second is an index into the |
175 | /// corresponding file type definition's list of globs. |
176 | glob_to_selection: Vec<(usize, usize)>, |
177 | /// The set of all glob selections, used for actual matching. |
178 | set: GlobSet, |
179 | /// Temporary storage for globs that match. |
180 | matches: Arc<Pool<Vec<usize>>>, |
181 | } |
182 | |
183 | /// Indicates the type of a selection for a particular file type. |
184 | #[derive (Clone, Debug)] |
185 | enum Selection<T> { |
186 | Select(String, T), |
187 | Negate(String, T), |
188 | } |
189 | |
190 | impl<T> Selection<T> { |
191 | fn is_negated(&self) -> bool { |
192 | match *self { |
193 | Selection::Select(..) => false, |
194 | Selection::Negate(..) => true, |
195 | } |
196 | } |
197 | |
198 | fn name(&self) -> &str { |
199 | match *self { |
200 | Selection::Select(ref name, _) => name, |
201 | Selection::Negate(ref name, _) => name, |
202 | } |
203 | } |
204 | |
205 | fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Selection<U> { |
206 | match self { |
207 | Selection::Select(name, inner) => { |
208 | Selection::Select(name, f(inner)) |
209 | } |
210 | Selection::Negate(name, inner) => { |
211 | Selection::Negate(name, f(inner)) |
212 | } |
213 | } |
214 | } |
215 | |
216 | fn inner(&self) -> &T { |
217 | match *self { |
218 | Selection::Select(_, ref inner) => inner, |
219 | Selection::Negate(_, ref inner) => inner, |
220 | } |
221 | } |
222 | } |
223 | |
224 | impl Types { |
225 | /// Creates a new file type matcher that never matches any path and |
226 | /// contains no file type definitions. |
227 | pub fn empty() -> Types { |
228 | Types { |
229 | defs: vec![], |
230 | selections: vec![], |
231 | has_selected: false, |
232 | glob_to_selection: vec![], |
233 | set: GlobSetBuilder::new().build().unwrap(), |
234 | matches: Arc::new(Pool::new(|| vec![])), |
235 | } |
236 | } |
237 | |
238 | /// Returns true if and only if this matcher has zero selections. |
239 | pub fn is_empty(&self) -> bool { |
240 | self.selections.is_empty() |
241 | } |
242 | |
243 | /// Returns the number of selections used in this matcher. |
244 | pub fn len(&self) -> usize { |
245 | self.selections.len() |
246 | } |
247 | |
248 | /// Return the set of current file type definitions. |
249 | /// |
250 | /// Definitions and globs are sorted. |
251 | pub fn definitions(&self) -> &[FileTypeDef] { |
252 | &self.defs |
253 | } |
254 | |
255 | /// Returns a match for the given path against this file type matcher. |
256 | /// |
257 | /// The path is considered whitelisted if it matches a selected file type. |
258 | /// The path is considered ignored if it matches a negated file type. |
259 | /// If at least one file type is selected and `path` doesn't match, then |
260 | /// the path is also considered ignored. |
261 | pub fn matched<'a, P: AsRef<Path>>( |
262 | &'a self, |
263 | path: P, |
264 | is_dir: bool, |
265 | ) -> Match<Glob<'a>> { |
266 | // File types don't apply to directories, and we can't do anything |
267 | // if our glob set is empty. |
268 | if is_dir || self.set.is_empty() { |
269 | return Match::None; |
270 | } |
271 | // We only want to match against the file name, so extract it. |
272 | // If one doesn't exist, then we can't match it. |
273 | let name = match file_name(path.as_ref()) { |
274 | Some(name) => name, |
275 | None if self.has_selected => { |
276 | return Match::Ignore(Glob::unmatched()); |
277 | } |
278 | None => { |
279 | return Match::None; |
280 | } |
281 | }; |
282 | let mut matches = self.matches.get(); |
283 | self.set.matches_into(name, &mut *matches); |
284 | // The highest precedent match is the last one. |
285 | if let Some(&i) = matches.last() { |
286 | let (isel, _) = self.glob_to_selection[i]; |
287 | let sel = &self.selections[isel]; |
288 | let glob = Glob(GlobInner::Matched { def: sel.inner() }); |
289 | return if sel.is_negated() { |
290 | Match::Ignore(glob) |
291 | } else { |
292 | Match::Whitelist(glob) |
293 | }; |
294 | } |
295 | if self.has_selected { |
296 | Match::Ignore(Glob::unmatched()) |
297 | } else { |
298 | Match::None |
299 | } |
300 | } |
301 | } |
302 | |
303 | /// TypesBuilder builds a type matcher from a set of file type definitions and |
304 | /// a set of file type selections. |
305 | pub struct TypesBuilder { |
306 | types: HashMap<String, FileTypeDef>, |
307 | selections: Vec<Selection<()>>, |
308 | } |
309 | |
310 | impl TypesBuilder { |
311 | /// Create a new builder for a file type matcher. |
312 | /// |
313 | /// The builder contains *no* type definitions to start with. A set |
314 | /// of default type definitions can be added with `add_defaults`, and |
315 | /// additional type definitions can be added with `select` and `negate`. |
316 | pub fn new() -> TypesBuilder { |
317 | TypesBuilder { types: HashMap::new(), selections: vec![] } |
318 | } |
319 | |
320 | /// Build the current set of file type definitions *and* selections into |
321 | /// a file type matcher. |
322 | pub fn build(&self) -> Result<Types, Error> { |
323 | let defs = self.definitions(); |
324 | let has_selected = self.selections.iter().any(|s| !s.is_negated()); |
325 | |
326 | let mut selections = vec![]; |
327 | let mut glob_to_selection = vec![]; |
328 | let mut build_set = GlobSetBuilder::new(); |
329 | for (isel, selection) in self.selections.iter().enumerate() { |
330 | let def = match self.types.get(selection.name()) { |
331 | Some(def) => def.clone(), |
332 | None => { |
333 | let name = selection.name().to_string(); |
334 | return Err(Error::UnrecognizedFileType(name)); |
335 | } |
336 | }; |
337 | for (iglob, glob) in def.globs.iter().enumerate() { |
338 | build_set.add( |
339 | GlobBuilder::new(glob) |
340 | .literal_separator(true) |
341 | .build() |
342 | .map_err(|err| Error::Glob { |
343 | glob: Some(glob.to_string()), |
344 | err: err.kind().to_string(), |
345 | })?, |
346 | ); |
347 | glob_to_selection.push((isel, iglob)); |
348 | } |
349 | selections.push(selection.clone().map(move |_| def)); |
350 | } |
351 | let set = build_set |
352 | .build() |
353 | .map_err(|err| Error::Glob { glob: None, err: err.to_string() })?; |
354 | Ok(Types { |
355 | defs, |
356 | selections, |
357 | has_selected, |
358 | glob_to_selection, |
359 | set, |
360 | matches: Arc::new(Pool::new(|| vec![])), |
361 | }) |
362 | } |
363 | |
364 | /// Return the set of current file type definitions. |
365 | /// |
366 | /// Definitions and globs are sorted. |
367 | pub fn definitions(&self) -> Vec<FileTypeDef> { |
368 | let mut defs = vec![]; |
369 | for def in self.types.values() { |
370 | let mut def = def.clone(); |
371 | def.globs.sort(); |
372 | defs.push(def); |
373 | } |
374 | defs.sort_by(|def1, def2| def1.name().cmp(def2.name())); |
375 | defs |
376 | } |
377 | |
378 | /// Select the file type given by `name`. |
379 | /// |
380 | /// If `name` is `all`, then all file types currently defined are selected. |
381 | pub fn select(&mut self, name: &str) -> &mut TypesBuilder { |
382 | if name == "all" { |
383 | for name in self.types.keys() { |
384 | self.selections.push(Selection::Select(name.to_string(), ())); |
385 | } |
386 | } else { |
387 | self.selections.push(Selection::Select(name.to_string(), ())); |
388 | } |
389 | self |
390 | } |
391 | |
392 | /// Ignore the file type given by `name`. |
393 | /// |
394 | /// If `name` is `all`, then all file types currently defined are negated. |
395 | pub fn negate(&mut self, name: &str) -> &mut TypesBuilder { |
396 | if name == "all" { |
397 | for name in self.types.keys() { |
398 | self.selections.push(Selection::Negate(name.to_string(), ())); |
399 | } |
400 | } else { |
401 | self.selections.push(Selection::Negate(name.to_string(), ())); |
402 | } |
403 | self |
404 | } |
405 | |
406 | /// Clear any file type definitions for the type name given. |
407 | pub fn clear(&mut self, name: &str) -> &mut TypesBuilder { |
408 | self.types.remove(name); |
409 | self |
410 | } |
411 | |
412 | /// Add a new file type definition. `name` can be arbitrary and `pat` |
413 | /// should be a glob recognizing file paths belonging to the `name` type. |
414 | /// |
415 | /// If `name` is `all` or otherwise contains any character that is not a |
416 | /// Unicode letter or number, then an error is returned. |
417 | pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> { |
418 | if name == "all" || !name.chars().all(|c| c.is_alphanumeric()) { |
419 | return Err(Error::InvalidDefinition); |
420 | } |
421 | let (key, glob) = (name.to_string(), glob.to_string()); |
422 | self.types |
423 | .entry(key) |
424 | .or_insert_with(|| FileTypeDef { |
425 | name: name.to_string(), |
426 | globs: vec![], |
427 | }) |
428 | .globs |
429 | .push(glob); |
430 | Ok(()) |
431 | } |
432 | |
433 | /// Add a new file type definition specified in string form. There are two |
434 | /// valid formats: |
435 | /// 1. `{name}:{glob}`. This defines a 'root' definition that associates the |
436 | /// given name with the given glob. |
437 | /// 2. `{name}:include:{comma-separated list of already defined names}. |
438 | /// This defines an 'include' definition that associates the given name |
439 | /// with the definitions of the given existing types. |
440 | /// Names may not include any characters that are not |
441 | /// Unicode letters or numbers. |
442 | pub fn add_def(&mut self, def: &str) -> Result<(), Error> { |
443 | let parts: Vec<&str> = def.split(':' ).collect(); |
444 | match parts.len() { |
445 | 2 => { |
446 | let name = parts[0]; |
447 | let glob = parts[1]; |
448 | if name.is_empty() || glob.is_empty() { |
449 | return Err(Error::InvalidDefinition); |
450 | } |
451 | self.add(name, glob) |
452 | } |
453 | 3 => { |
454 | let name = parts[0]; |
455 | let types_string = parts[2]; |
456 | if name.is_empty() |
457 | || parts[1] != "include" |
458 | || types_string.is_empty() |
459 | { |
460 | return Err(Error::InvalidDefinition); |
461 | } |
462 | let types = types_string.split(',' ); |
463 | // Check ahead of time to ensure that all types specified are |
464 | // present and fail fast if not. |
465 | if types.clone().any(|t| !self.types.contains_key(t)) { |
466 | return Err(Error::InvalidDefinition); |
467 | } |
468 | for type_name in types { |
469 | let globs = |
470 | self.types.get(type_name).unwrap().globs.clone(); |
471 | for glob in globs { |
472 | self.add(name, &glob)?; |
473 | } |
474 | } |
475 | Ok(()) |
476 | } |
477 | _ => Err(Error::InvalidDefinition), |
478 | } |
479 | } |
480 | |
481 | /// Add a set of default file type definitions. |
482 | pub fn add_defaults(&mut self) -> &mut TypesBuilder { |
483 | static MSG: &'static str = "adding a default type should never fail" ; |
484 | for &(names, exts) in DEFAULT_TYPES { |
485 | for name in names { |
486 | for ext in exts { |
487 | self.add(name, ext).expect(MSG); |
488 | } |
489 | } |
490 | } |
491 | self |
492 | } |
493 | } |
494 | |
495 | #[cfg (test)] |
496 | mod tests { |
497 | use super::TypesBuilder; |
498 | |
499 | macro_rules! matched { |
500 | ($name:ident, $types:expr, $sel:expr, $selnot:expr, |
501 | $path:expr) => { |
502 | matched!($name, $types, $sel, $selnot, $path, true); |
503 | }; |
504 | (not, $name:ident, $types:expr, $sel:expr, $selnot:expr, |
505 | $path:expr) => { |
506 | matched!($name, $types, $sel, $selnot, $path, false); |
507 | }; |
508 | ($name:ident, $types:expr, $sel:expr, $selnot:expr, |
509 | $path:expr, $matched:expr) => { |
510 | #[test] |
511 | fn $name() { |
512 | let mut btypes = TypesBuilder::new(); |
513 | for tydef in $types { |
514 | btypes.add_def(tydef).unwrap(); |
515 | } |
516 | for sel in $sel { |
517 | btypes.select(sel); |
518 | } |
519 | for selnot in $selnot { |
520 | btypes.negate(selnot); |
521 | } |
522 | let types = btypes.build().unwrap(); |
523 | let mat = types.matched($path, false); |
524 | assert_eq!($matched, !mat.is_ignore()); |
525 | } |
526 | }; |
527 | } |
528 | |
529 | fn types() -> Vec<&'static str> { |
530 | vec![ |
531 | "html:*.html" , |
532 | "html:*.htm" , |
533 | "rust:*.rs" , |
534 | "js:*.js" , |
535 | "py:*.py" , |
536 | "python:*.py" , |
537 | "foo:*.{rs,foo}" , |
538 | "combo:include:html,rust" , |
539 | ] |
540 | } |
541 | |
542 | matched!(match1, types(), vec!["rust" ], vec![], "lib.rs" ); |
543 | matched!(match2, types(), vec!["html" ], vec![], "index.html" ); |
544 | matched!(match3, types(), vec!["html" ], vec![], "index.htm" ); |
545 | matched!(match4, types(), vec!["html" , "rust" ], vec![], "main.rs" ); |
546 | matched!(match5, types(), vec![], vec![], "index.html" ); |
547 | matched!(match6, types(), vec![], vec!["rust" ], "index.html" ); |
548 | matched!(match7, types(), vec!["foo" ], vec!["rust" ], "main.foo" ); |
549 | matched!(match8, types(), vec!["combo" ], vec![], "index.html" ); |
550 | matched!(match9, types(), vec!["combo" ], vec![], "lib.rs" ); |
551 | matched!(match10, types(), vec!["py" ], vec![], "main.py" ); |
552 | matched!(match11, types(), vec!["python" ], vec![], "main.py" ); |
553 | |
554 | matched!(not, matchnot1, types(), vec!["rust" ], vec![], "index.html" ); |
555 | matched!(not, matchnot2, types(), vec![], vec!["rust" ], "main.rs" ); |
556 | matched!(not, matchnot3, types(), vec!["foo" ], vec!["rust" ], "main.rs" ); |
557 | matched!(not, matchnot4, types(), vec!["rust" ], vec!["foo" ], "main.rs" ); |
558 | matched!(not, matchnot5, types(), vec!["rust" ], vec!["foo" ], "main.foo" ); |
559 | matched!(not, matchnot6, types(), vec!["combo" ], vec![], "leftpad.js" ); |
560 | matched!(not, matchnot7, types(), vec!["py" ], vec![], "index.html" ); |
561 | matched!(not, matchnot8, types(), vec!["python" ], vec![], "doc.md" ); |
562 | |
563 | #[test ] |
564 | fn test_invalid_defs() { |
565 | let mut btypes = TypesBuilder::new(); |
566 | for tydef in types() { |
567 | btypes.add_def(tydef).unwrap(); |
568 | } |
569 | // Preserve the original definitions for later comparison. |
570 | let original_defs = btypes.definitions(); |
571 | let bad_defs = vec![ |
572 | // Reference to type that does not exist |
573 | "combo:include:html,qwerty" , |
574 | // Bad format |
575 | "combo:foobar:html,rust" , |
576 | "" , |
577 | ]; |
578 | for def in bad_defs { |
579 | assert!(btypes.add_def(def).is_err()); |
580 | // Ensure that nothing changed, even if some of the includes were valid. |
581 | assert_eq!(btypes.definitions(), original_defs); |
582 | } |
583 | } |
584 | } |
585 | |