1 | /*! |
2 | The types module provides a way of associating globs on file names to file |
3 | types. |
4 | |
5 | This can be used to match specific types of files. For example, among |
6 | the default file types provided, the Rust file type is defined to be `*.rs` |
7 | with name `rust`. Similarly, the C file type is defined to be `*.{c,h}` with |
8 | name `c`. |
9 | |
10 | Note that the set of default types may change over time. |
11 | |
12 | # Example |
13 | |
14 | This shows how to create and use a simple file type matcher using the default |
15 | file types defined in this crate. |
16 | |
17 | ``` |
18 | use ignore::types::TypesBuilder; |
19 | |
20 | let mut builder = TypesBuilder::new(); |
21 | builder.add_defaults(); |
22 | builder.select("rust" ); |
23 | let matcher = builder.build().unwrap(); |
24 | |
25 | assert!(matcher.matched("foo.rs" , false).is_whitelist()); |
26 | assert!(matcher.matched("foo.c" , false).is_ignore()); |
27 | ``` |
28 | |
29 | # Example: negation |
30 | |
31 | This is like the previous example, but shows how negating a file type works. |
32 | That is, this will let us match file paths that *don't* correspond to a |
33 | particular file type. |
34 | |
35 | ``` |
36 | use ignore::types::TypesBuilder; |
37 | |
38 | let mut builder = TypesBuilder::new(); |
39 | builder.add_defaults(); |
40 | builder.negate("c" ); |
41 | let matcher = builder.build().unwrap(); |
42 | |
43 | assert!(matcher.matched("foo.rs" , false).is_none()); |
44 | assert!(matcher.matched("foo.c" , false).is_ignore()); |
45 | ``` |
46 | |
47 | # Example: custom file type definitions |
48 | |
49 | This shows how to extend this library default file type definitions with |
50 | your own. |
51 | |
52 | ``` |
53 | use ignore::types::TypesBuilder; |
54 | |
55 | let mut builder = TypesBuilder::new(); |
56 | builder.add_defaults(); |
57 | builder.add("foo" , "*.foo" ); |
58 | // Another way of adding a file type definition. |
59 | // This is useful when accepting input from an end user. |
60 | builder.add_def("bar:*.bar" ); |
61 | // Note: we only select `foo`, not `bar`. |
62 | builder.select("foo" ); |
63 | let matcher = builder.build().unwrap(); |
64 | |
65 | assert!(matcher.matched("x.foo" , false).is_whitelist()); |
66 | // This is ignored because we only selected the `foo` file type. |
67 | assert!(matcher.matched("x.bar" , false).is_ignore()); |
68 | ``` |
69 | |
70 | We can also add file type definitions based on other definitions. |
71 | |
72 | ``` |
73 | use ignore::types::TypesBuilder; |
74 | |
75 | let mut builder = TypesBuilder::new(); |
76 | builder.add_defaults(); |
77 | builder.add("foo" , "*.foo" ); |
78 | builder.add_def("bar:include:foo,cpp" ); |
79 | builder.select("bar" ); |
80 | let matcher = builder.build().unwrap(); |
81 | |
82 | assert!(matcher.matched("x.foo" , false).is_whitelist()); |
83 | assert!(matcher.matched("y.cpp" , false).is_whitelist()); |
84 | ``` |
85 | */ |
86 | |
87 | use std::cell::RefCell; |
88 | use std::collections::HashMap; |
89 | use std::path::Path; |
90 | use std::sync::Arc; |
91 | |
92 | use globset::{GlobBuilder, GlobSet, GlobSetBuilder}; |
93 | use regex::Regex; |
94 | use thread_local::ThreadLocal; |
95 | |
96 | use crate::default_types::DEFAULT_TYPES; |
97 | use crate::pathutil::file_name; |
98 | use crate::{Error, Match}; |
99 | |
100 | /// Glob represents a single glob in a set of file type definitions. |
101 | /// |
102 | /// There may be more than one glob for a particular file type. |
103 | /// |
104 | /// This is used to report information about the highest precedent glob |
105 | /// that matched. |
106 | /// |
107 | /// Note that not all matches necessarily correspond to a specific glob. |
108 | /// For example, if there are one or more selections and a file path doesn't |
109 | /// match any of those selections, then the file path is considered to be |
110 | /// ignored. |
111 | /// |
112 | /// The lifetime `'a` refers to the lifetime of the underlying file type |
113 | /// definition, which corresponds to the lifetime of the file type matcher. |
114 | #[derive (Clone, Debug)] |
115 | pub struct Glob<'a>(GlobInner<'a>); |
116 | |
117 | #[derive (Clone, Debug)] |
118 | enum GlobInner<'a> { |
119 | /// No glob matched, but the file path should still be ignored. |
120 | UnmatchedIgnore, |
121 | /// A glob matched. |
122 | Matched { |
123 | /// The file type definition which provided the glob. |
124 | def: &'a FileTypeDef, |
125 | }, |
126 | } |
127 | |
128 | impl<'a> Glob<'a> { |
129 | fn unmatched() -> Glob<'a> { |
130 | Glob(GlobInner::UnmatchedIgnore) |
131 | } |
132 | |
133 | /// Return the file type definition that matched, if one exists. A file type |
134 | /// definition always exists when a specific definition matches a file |
135 | /// path. |
136 | pub fn file_type_def(&self) -> Option<&FileTypeDef> { |
137 | match self { |
138 | Glob(GlobInner::UnmatchedIgnore) => None, |
139 | Glob(GlobInner::Matched { def: &&FileTypeDef, .. }) => Some(def), |
140 | } |
141 | } |
142 | } |
143 | |
144 | /// A single file type definition. |
145 | /// |
146 | /// File type definitions can be retrieved in aggregate from a file type |
147 | /// matcher. File type definitions are also reported when its responsible |
148 | /// for a match. |
149 | #[derive (Clone, Debug, Eq, PartialEq)] |
150 | pub struct FileTypeDef { |
151 | name: String, |
152 | globs: Vec<String>, |
153 | } |
154 | |
155 | impl FileTypeDef { |
156 | /// Return the name of this file type. |
157 | pub fn name(&self) -> &str { |
158 | &self.name |
159 | } |
160 | |
161 | /// Return the globs used to recognize this file type. |
162 | pub fn globs(&self) -> &[String] { |
163 | &self.globs |
164 | } |
165 | } |
166 | |
167 | /// Types is a file type matcher. |
168 | #[derive (Clone, Debug)] |
169 | pub struct Types { |
170 | /// All of the file type definitions, sorted lexicographically by name. |
171 | defs: Vec<FileTypeDef>, |
172 | /// All of the selections made by the user. |
173 | selections: Vec<Selection<FileTypeDef>>, |
174 | /// Whether there is at least one Selection::Select in our selections. |
175 | /// When this is true, a Match::None is converted to Match::Ignore. |
176 | has_selected: bool, |
177 | /// A mapping from glob index in the set to two indices. The first is an |
178 | /// index into `selections` and the second is an index into the |
179 | /// corresponding file type definition's list of globs. |
180 | glob_to_selection: Vec<(usize, usize)>, |
181 | /// The set of all glob selections, used for actual matching. |
182 | set: GlobSet, |
183 | /// Temporary storage for globs that match. |
184 | matches: Arc<ThreadLocal<RefCell<Vec<usize>>>>, |
185 | } |
186 | |
187 | /// Indicates the type of a selection for a particular file type. |
188 | #[derive (Clone, Debug)] |
189 | enum Selection<T> { |
190 | Select(String, T), |
191 | Negate(String, T), |
192 | } |
193 | |
194 | impl<T> Selection<T> { |
195 | fn is_negated(&self) -> bool { |
196 | match *self { |
197 | Selection::Select(..) => false, |
198 | Selection::Negate(..) => true, |
199 | } |
200 | } |
201 | |
202 | fn name(&self) -> &str { |
203 | match *self { |
204 | Selection::Select(ref name, _) => name, |
205 | Selection::Negate(ref name, _) => name, |
206 | } |
207 | } |
208 | |
209 | fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Selection<U> { |
210 | match self { |
211 | Selection::Select(name, inner) => { |
212 | Selection::Select(name, f(inner)) |
213 | } |
214 | Selection::Negate(name, inner) => { |
215 | Selection::Negate(name, f(inner)) |
216 | } |
217 | } |
218 | } |
219 | |
220 | fn inner(&self) -> &T { |
221 | match *self { |
222 | Selection::Select(_, ref inner) => inner, |
223 | Selection::Negate(_, ref inner) => inner, |
224 | } |
225 | } |
226 | } |
227 | |
228 | impl Types { |
229 | /// Creates a new file type matcher that never matches any path and |
230 | /// contains no file type definitions. |
231 | pub fn empty() -> Types { |
232 | Types { |
233 | defs: vec![], |
234 | selections: vec![], |
235 | has_selected: false, |
236 | glob_to_selection: vec![], |
237 | set: GlobSetBuilder::new().build().unwrap(), |
238 | matches: Arc::new(ThreadLocal::default()), |
239 | } |
240 | } |
241 | |
242 | /// Returns true if and only if this matcher has zero selections. |
243 | pub fn is_empty(&self) -> bool { |
244 | self.selections.is_empty() |
245 | } |
246 | |
247 | /// Returns the number of selections used in this matcher. |
248 | pub fn len(&self) -> usize { |
249 | self.selections.len() |
250 | } |
251 | |
252 | /// Return the set of current file type definitions. |
253 | /// |
254 | /// Definitions and globs are sorted. |
255 | pub fn definitions(&self) -> &[FileTypeDef] { |
256 | &self.defs |
257 | } |
258 | |
259 | /// Returns a match for the given path against this file type matcher. |
260 | /// |
261 | /// The path is considered whitelisted if it matches a selected file type. |
262 | /// The path is considered ignored if it matches a negated file type. |
263 | /// If at least one file type is selected and `path` doesn't match, then |
264 | /// the path is also considered ignored. |
265 | pub fn matched<'a, P: AsRef<Path>>( |
266 | &'a self, |
267 | path: P, |
268 | is_dir: bool, |
269 | ) -> Match<Glob<'a>> { |
270 | // File types don't apply to directories, and we can't do anything |
271 | // if our glob set is empty. |
272 | if is_dir || self.set.is_empty() { |
273 | return Match::None; |
274 | } |
275 | // We only want to match against the file name, so extract it. |
276 | // If one doesn't exist, then we can't match it. |
277 | let name = match file_name(path.as_ref()) { |
278 | Some(name) => name, |
279 | None if self.has_selected => { |
280 | return Match::Ignore(Glob::unmatched()); |
281 | } |
282 | None => { |
283 | return Match::None; |
284 | } |
285 | }; |
286 | let mut matches = self.matches.get_or_default().borrow_mut(); |
287 | self.set.matches_into(name, &mut *matches); |
288 | // The highest precedent match is the last one. |
289 | if let Some(&i) = matches.last() { |
290 | let (isel, _) = self.glob_to_selection[i]; |
291 | let sel = &self.selections[isel]; |
292 | let glob = Glob(GlobInner::Matched { def: sel.inner() }); |
293 | return if sel.is_negated() { |
294 | Match::Ignore(glob) |
295 | } else { |
296 | Match::Whitelist(glob) |
297 | }; |
298 | } |
299 | if self.has_selected { |
300 | Match::Ignore(Glob::unmatched()) |
301 | } else { |
302 | Match::None |
303 | } |
304 | } |
305 | } |
306 | |
307 | /// TypesBuilder builds a type matcher from a set of file type definitions and |
308 | /// a set of file type selections. |
309 | pub struct TypesBuilder { |
310 | types: HashMap<String, FileTypeDef>, |
311 | selections: Vec<Selection<()>>, |
312 | } |
313 | |
314 | impl TypesBuilder { |
315 | /// Create a new builder for a file type matcher. |
316 | /// |
317 | /// The builder contains *no* type definitions to start with. A set |
318 | /// of default type definitions can be added with `add_defaults`, and |
319 | /// additional type definitions can be added with `select` and `negate`. |
320 | pub fn new() -> TypesBuilder { |
321 | TypesBuilder { types: HashMap::new(), selections: vec![] } |
322 | } |
323 | |
324 | /// Build the current set of file type definitions *and* selections into |
325 | /// a file type matcher. |
326 | pub fn build(&self) -> Result<Types, Error> { |
327 | let defs = self.definitions(); |
328 | let has_selected = self.selections.iter().any(|s| !s.is_negated()); |
329 | |
330 | let mut selections = vec![]; |
331 | let mut glob_to_selection = vec![]; |
332 | let mut build_set = GlobSetBuilder::new(); |
333 | for (isel, selection) in self.selections.iter().enumerate() { |
334 | let def = match self.types.get(selection.name()) { |
335 | Some(def) => def.clone(), |
336 | None => { |
337 | let name = selection.name().to_string(); |
338 | return Err(Error::UnrecognizedFileType(name)); |
339 | } |
340 | }; |
341 | for (iglob, glob) in def.globs.iter().enumerate() { |
342 | build_set.add( |
343 | GlobBuilder::new(glob) |
344 | .literal_separator(true) |
345 | .build() |
346 | .map_err(|err| Error::Glob { |
347 | glob: Some(glob.to_string()), |
348 | err: err.kind().to_string(), |
349 | })?, |
350 | ); |
351 | glob_to_selection.push((isel, iglob)); |
352 | } |
353 | selections.push(selection.clone().map(move |_| def)); |
354 | } |
355 | let set = build_set |
356 | .build() |
357 | .map_err(|err| Error::Glob { glob: None, err: err.to_string() })?; |
358 | Ok(Types { |
359 | defs: defs, |
360 | selections: selections, |
361 | has_selected: has_selected, |
362 | glob_to_selection: glob_to_selection, |
363 | set: set, |
364 | matches: Arc::new(ThreadLocal::default()), |
365 | }) |
366 | } |
367 | |
368 | /// Return the set of current file type definitions. |
369 | /// |
370 | /// Definitions and globs are sorted. |
371 | pub fn definitions(&self) -> Vec<FileTypeDef> { |
372 | let mut defs = vec![]; |
373 | for def in self.types.values() { |
374 | let mut def = def.clone(); |
375 | def.globs.sort(); |
376 | defs.push(def); |
377 | } |
378 | defs.sort_by(|def1, def2| def1.name().cmp(def2.name())); |
379 | defs |
380 | } |
381 | |
382 | /// Select the file type given by `name`. |
383 | /// |
384 | /// If `name` is `all`, then all file types currently defined are selected. |
385 | pub fn select(&mut self, name: &str) -> &mut TypesBuilder { |
386 | if name == "all" { |
387 | for name in self.types.keys() { |
388 | self.selections.push(Selection::Select(name.to_string(), ())); |
389 | } |
390 | } else { |
391 | self.selections.push(Selection::Select(name.to_string(), ())); |
392 | } |
393 | self |
394 | } |
395 | |
396 | /// Ignore the file type given by `name`. |
397 | /// |
398 | /// If `name` is `all`, then all file types currently defined are negated. |
399 | pub fn negate(&mut self, name: &str) -> &mut TypesBuilder { |
400 | if name == "all" { |
401 | for name in self.types.keys() { |
402 | self.selections.push(Selection::Negate(name.to_string(), ())); |
403 | } |
404 | } else { |
405 | self.selections.push(Selection::Negate(name.to_string(), ())); |
406 | } |
407 | self |
408 | } |
409 | |
410 | /// Clear any file type definitions for the type name given. |
411 | pub fn clear(&mut self, name: &str) -> &mut TypesBuilder { |
412 | self.types.remove(name); |
413 | self |
414 | } |
415 | |
416 | /// Add a new file type definition. `name` can be arbitrary and `pat` |
417 | /// should be a glob recognizing file paths belonging to the `name` type. |
418 | /// |
419 | /// If `name` is `all` or otherwise contains any character that is not a |
420 | /// Unicode letter or number, then an error is returned. |
421 | pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> { |
422 | lazy_static::lazy_static! { |
423 | static ref RE: Regex = Regex::new(r"^[\pL\pN]+$" ).unwrap(); |
424 | }; |
425 | if name == "all" || !RE.is_match(name) { |
426 | return Err(Error::InvalidDefinition); |
427 | } |
428 | let (key, glob) = (name.to_string(), glob.to_string()); |
429 | self.types |
430 | .entry(key) |
431 | .or_insert_with(|| FileTypeDef { |
432 | name: name.to_string(), |
433 | globs: vec![], |
434 | }) |
435 | .globs |
436 | .push(glob); |
437 | Ok(()) |
438 | } |
439 | |
440 | /// Add a new file type definition specified in string form. There are two |
441 | /// valid formats: |
442 | /// 1. `{name}:{glob}`. This defines a 'root' definition that associates the |
443 | /// given name with the given glob. |
444 | /// 2. `{name}:include:{comma-separated list of already defined names}. |
445 | /// This defines an 'include' definition that associates the given name |
446 | /// with the definitions of the given existing types. |
447 | /// Names may not include any characters that are not |
448 | /// Unicode letters or numbers. |
449 | pub fn add_def(&mut self, def: &str) -> Result<(), Error> { |
450 | let parts: Vec<&str> = def.split(':' ).collect(); |
451 | match parts.len() { |
452 | 2 => { |
453 | let name = parts[0]; |
454 | let glob = parts[1]; |
455 | if name.is_empty() || glob.is_empty() { |
456 | return Err(Error::InvalidDefinition); |
457 | } |
458 | self.add(name, glob) |
459 | } |
460 | 3 => { |
461 | let name = parts[0]; |
462 | let types_string = parts[2]; |
463 | if name.is_empty() |
464 | || parts[1] != "include" |
465 | || types_string.is_empty() |
466 | { |
467 | return Err(Error::InvalidDefinition); |
468 | } |
469 | let types = types_string.split(',' ); |
470 | // Check ahead of time to ensure that all types specified are |
471 | // present and fail fast if not. |
472 | if types.clone().any(|t| !self.types.contains_key(t)) { |
473 | return Err(Error::InvalidDefinition); |
474 | } |
475 | for type_name in types { |
476 | let globs = |
477 | self.types.get(type_name).unwrap().globs.clone(); |
478 | for glob in globs { |
479 | self.add(name, &glob)?; |
480 | } |
481 | } |
482 | Ok(()) |
483 | } |
484 | _ => Err(Error::InvalidDefinition), |
485 | } |
486 | } |
487 | |
488 | /// Add a set of default file type definitions. |
489 | pub fn add_defaults(&mut self) -> &mut TypesBuilder { |
490 | static MSG: &'static str = "adding a default type should never fail" ; |
491 | for &(name, exts) in DEFAULT_TYPES { |
492 | for ext in exts { |
493 | self.add(name, ext).expect(MSG); |
494 | } |
495 | } |
496 | self |
497 | } |
498 | } |
499 | |
500 | #[cfg (test)] |
501 | mod tests { |
502 | use super::TypesBuilder; |
503 | |
504 | macro_rules! matched { |
505 | ($name:ident, $types:expr, $sel:expr, $selnot:expr, |
506 | $path:expr) => { |
507 | matched!($name, $types, $sel, $selnot, $path, true); |
508 | }; |
509 | (not, $name:ident, $types:expr, $sel:expr, $selnot:expr, |
510 | $path:expr) => { |
511 | matched!($name, $types, $sel, $selnot, $path, false); |
512 | }; |
513 | ($name:ident, $types:expr, $sel:expr, $selnot:expr, |
514 | $path:expr, $matched:expr) => { |
515 | #[test] |
516 | fn $name() { |
517 | let mut btypes = TypesBuilder::new(); |
518 | for tydef in $types { |
519 | btypes.add_def(tydef).unwrap(); |
520 | } |
521 | for sel in $sel { |
522 | btypes.select(sel); |
523 | } |
524 | for selnot in $selnot { |
525 | btypes.negate(selnot); |
526 | } |
527 | let types = btypes.build().unwrap(); |
528 | let mat = types.matched($path, false); |
529 | assert_eq!($matched, !mat.is_ignore()); |
530 | } |
531 | }; |
532 | } |
533 | |
534 | fn types() -> Vec<&'static str> { |
535 | vec![ |
536 | "html:*.html" , |
537 | "html:*.htm" , |
538 | "rust:*.rs" , |
539 | "js:*.js" , |
540 | "foo:*.{rs,foo}" , |
541 | "combo:include:html,rust" , |
542 | ] |
543 | } |
544 | |
545 | matched!(match1, types(), vec!["rust" ], vec![], "lib.rs" ); |
546 | matched!(match2, types(), vec!["html" ], vec![], "index.html" ); |
547 | matched!(match3, types(), vec!["html" ], vec![], "index.htm" ); |
548 | matched!(match4, types(), vec!["html" , "rust" ], vec![], "main.rs" ); |
549 | matched!(match5, types(), vec![], vec![], "index.html" ); |
550 | matched!(match6, types(), vec![], vec!["rust" ], "index.html" ); |
551 | matched!(match7, types(), vec!["foo" ], vec!["rust" ], "main.foo" ); |
552 | matched!(match8, types(), vec!["combo" ], vec![], "index.html" ); |
553 | matched!(match9, types(), vec!["combo" ], vec![], "lib.rs" ); |
554 | |
555 | matched!(not, matchnot1, types(), vec!["rust" ], vec![], "index.html" ); |
556 | matched!(not, matchnot2, types(), vec![], vec!["rust" ], "main.rs" ); |
557 | matched!(not, matchnot3, types(), vec!["foo" ], vec!["rust" ], "main.rs" ); |
558 | matched!(not, matchnot4, types(), vec!["rust" ], vec!["foo" ], "main.rs" ); |
559 | matched!(not, matchnot5, types(), vec!["rust" ], vec!["foo" ], "main.foo" ); |
560 | matched!(not, matchnot6, types(), vec!["combo" ], vec![], "leftpad.js" ); |
561 | |
562 | #[test ] |
563 | fn test_invalid_defs() { |
564 | let mut btypes = TypesBuilder::new(); |
565 | for tydef in types() { |
566 | btypes.add_def(tydef).unwrap(); |
567 | } |
568 | // Preserve the original definitions for later comparison. |
569 | let original_defs = btypes.definitions(); |
570 | let bad_defs = vec![ |
571 | // Reference to type that does not exist |
572 | "combo:include:html,python" , |
573 | // Bad format |
574 | "combo:foobar:html,rust" , |
575 | "" , |
576 | ]; |
577 | for def in bad_defs { |
578 | assert!(btypes.add_def(def).is_err()); |
579 | // Ensure that nothing changed, even if some of the includes were valid. |
580 | assert_eq!(btypes.definitions(), original_defs); |
581 | } |
582 | } |
583 | } |
584 | |