1 | /*! |
2 | The gitignore module provides a way to match globs from a gitignore file |
3 | against file paths. |
4 | |
5 | Note that this module implements the specification as described in the |
6 | `gitignore` man page from scratch. That is, this module does *not* shell out to |
7 | the `git` command line tool. |
8 | */ |
9 | |
10 | use std::{ |
11 | fs::File, |
12 | io::{BufRead, BufReader, Read}, |
13 | path::{Path, PathBuf}, |
14 | sync::Arc, |
15 | }; |
16 | |
17 | use { |
18 | globset::{Candidate, GlobBuilder, GlobSet, GlobSetBuilder}, |
19 | regex_automata::util::pool::Pool, |
20 | }; |
21 | |
22 | use crate::{ |
23 | pathutil::{is_file_name, strip_prefix}, |
24 | Error, Match, PartialErrorBuilder, |
25 | }; |
26 | |
27 | /// Glob represents a single glob in a gitignore file. |
28 | /// |
29 | /// This is used to report information about the highest precedent glob that |
30 | /// matched in one or more gitignore files. |
31 | #[derive (Clone, Debug)] |
32 | pub struct Glob { |
33 | /// The file path that this glob was extracted from. |
34 | from: Option<PathBuf>, |
35 | /// The original glob string. |
36 | original: String, |
37 | /// The actual glob string used to convert to a regex. |
38 | actual: String, |
39 | /// Whether this is a whitelisted glob or not. |
40 | is_whitelist: bool, |
41 | /// Whether this glob should only match directories or not. |
42 | is_only_dir: bool, |
43 | } |
44 | |
45 | impl Glob { |
46 | /// Returns the file path that defined this glob. |
47 | pub fn from(&self) -> Option<&Path> { |
48 | self.from.as_ref().map(|p| &**p) |
49 | } |
50 | |
51 | /// The original glob as it was defined in a gitignore file. |
52 | pub fn original(&self) -> &str { |
53 | &self.original |
54 | } |
55 | |
56 | /// The actual glob that was compiled to respect gitignore |
57 | /// semantics. |
58 | pub fn actual(&self) -> &str { |
59 | &self.actual |
60 | } |
61 | |
62 | /// Whether this was a whitelisted glob or not. |
63 | pub fn is_whitelist(&self) -> bool { |
64 | self.is_whitelist |
65 | } |
66 | |
67 | /// Whether this glob must match a directory or not. |
68 | pub fn is_only_dir(&self) -> bool { |
69 | self.is_only_dir |
70 | } |
71 | |
72 | /// Returns true if and only if this glob has a `**/` prefix. |
73 | fn has_doublestar_prefix(&self) -> bool { |
74 | self.actual.starts_with("**/" ) || self.actual == "**" |
75 | } |
76 | } |
77 | |
78 | /// Gitignore is a matcher for the globs in one or more gitignore files |
79 | /// in the same directory. |
80 | #[derive (Clone, Debug)] |
81 | pub struct Gitignore { |
82 | set: GlobSet, |
83 | root: PathBuf, |
84 | globs: Vec<Glob>, |
85 | num_ignores: u64, |
86 | num_whitelists: u64, |
87 | matches: Option<Arc<Pool<Vec<usize>>>>, |
88 | } |
89 | |
90 | impl Gitignore { |
91 | /// Creates a new gitignore matcher from the gitignore file path given. |
92 | /// |
93 | /// If it's desirable to include multiple gitignore files in a single |
94 | /// matcher, or read gitignore globs from a different source, then |
95 | /// use `GitignoreBuilder`. |
96 | /// |
97 | /// This always returns a valid matcher, even if it's empty. In particular, |
98 | /// a Gitignore file can be partially valid, e.g., when one glob is invalid |
99 | /// but the rest aren't. |
100 | /// |
101 | /// Note that I/O errors are ignored. For more granular control over |
102 | /// errors, use `GitignoreBuilder`. |
103 | pub fn new<P: AsRef<Path>>( |
104 | gitignore_path: P, |
105 | ) -> (Gitignore, Option<Error>) { |
106 | let path = gitignore_path.as_ref(); |
107 | let parent = path.parent().unwrap_or(Path::new("/" )); |
108 | let mut builder = GitignoreBuilder::new(parent); |
109 | let mut errs = PartialErrorBuilder::default(); |
110 | errs.maybe_push_ignore_io(builder.add(path)); |
111 | match builder.build() { |
112 | Ok(gi) => (gi, errs.into_error_option()), |
113 | Err(err) => { |
114 | errs.push(err); |
115 | (Gitignore::empty(), errs.into_error_option()) |
116 | } |
117 | } |
118 | } |
119 | |
120 | /// Creates a new gitignore matcher from the global ignore file, if one |
121 | /// exists. |
122 | /// |
123 | /// The global config file path is specified by git's `core.excludesFile` |
124 | /// config option. |
125 | /// |
126 | /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig` |
127 | /// does not exist or does not specify `core.excludesFile`, then |
128 | /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not |
129 | /// set or is empty, then `$HOME/.config/git/ignore` is used instead. |
130 | pub fn global() -> (Gitignore, Option<Error>) { |
131 | GitignoreBuilder::new("" ).build_global() |
132 | } |
133 | |
134 | /// Creates a new empty gitignore matcher that never matches anything. |
135 | /// |
136 | /// Its path is empty. |
137 | pub fn empty() -> Gitignore { |
138 | Gitignore { |
139 | set: GlobSet::empty(), |
140 | root: PathBuf::from("" ), |
141 | globs: vec![], |
142 | num_ignores: 0, |
143 | num_whitelists: 0, |
144 | matches: None, |
145 | } |
146 | } |
147 | |
148 | /// Returns the directory containing this gitignore matcher. |
149 | /// |
150 | /// All matches are done relative to this path. |
151 | pub fn path(&self) -> &Path { |
152 | &*self.root |
153 | } |
154 | |
155 | /// Returns true if and only if this gitignore has zero globs, and |
156 | /// therefore never matches any file path. |
157 | pub fn is_empty(&self) -> bool { |
158 | self.set.is_empty() |
159 | } |
160 | |
161 | /// Returns the total number of globs, which should be equivalent to |
162 | /// `num_ignores + num_whitelists`. |
163 | pub fn len(&self) -> usize { |
164 | self.set.len() |
165 | } |
166 | |
167 | /// Returns the total number of ignore globs. |
168 | pub fn num_ignores(&self) -> u64 { |
169 | self.num_ignores |
170 | } |
171 | |
172 | /// Returns the total number of whitelisted globs. |
173 | pub fn num_whitelists(&self) -> u64 { |
174 | self.num_whitelists |
175 | } |
176 | |
177 | /// Returns whether the given path (file or directory) matched a pattern in |
178 | /// this gitignore matcher. |
179 | /// |
180 | /// `is_dir` should be true if the path refers to a directory and false |
181 | /// otherwise. |
182 | /// |
183 | /// The given path is matched relative to the path given when building |
184 | /// the matcher. Specifically, before matching `path`, its prefix (as |
185 | /// determined by a common suffix of the directory containing this |
186 | /// gitignore) is stripped. If there is no common suffix/prefix overlap, |
187 | /// then `path` is assumed to be relative to this matcher. |
188 | pub fn matched<P: AsRef<Path>>( |
189 | &self, |
190 | path: P, |
191 | is_dir: bool, |
192 | ) -> Match<&Glob> { |
193 | if self.is_empty() { |
194 | return Match::None; |
195 | } |
196 | self.matched_stripped(self.strip(path.as_ref()), is_dir) |
197 | } |
198 | |
199 | /// Returns whether the given path (file or directory, and expected to be |
200 | /// under the root) or any of its parent directories (up to the root) |
201 | /// matched a pattern in this gitignore matcher. |
202 | /// |
203 | /// NOTE: This method is more expensive than walking the directory hierarchy |
204 | /// top-to-bottom and matching the entries. But, is easier to use in cases |
205 | /// when a list of paths are available without a hierarchy. |
206 | /// |
207 | /// `is_dir` should be true if the path refers to a directory and false |
208 | /// otherwise. |
209 | /// |
210 | /// The given path is matched relative to the path given when building |
211 | /// the matcher. Specifically, before matching `path`, its prefix (as |
212 | /// determined by a common suffix of the directory containing this |
213 | /// gitignore) is stripped. If there is no common suffix/prefix overlap, |
214 | /// then `path` is assumed to be relative to this matcher. |
215 | /// |
216 | /// # Panics |
217 | /// |
218 | /// This method panics if the given file path is not under the root path |
219 | /// of this matcher. |
220 | pub fn matched_path_or_any_parents<P: AsRef<Path>>( |
221 | &self, |
222 | path: P, |
223 | is_dir: bool, |
224 | ) -> Match<&Glob> { |
225 | if self.is_empty() { |
226 | return Match::None; |
227 | } |
228 | let mut path = self.strip(path.as_ref()); |
229 | assert!(!path.has_root(), "path is expected to be under the root" ); |
230 | |
231 | match self.matched_stripped(path, is_dir) { |
232 | Match::None => (), // walk up |
233 | a_match => return a_match, |
234 | } |
235 | while let Some(parent) = path.parent() { |
236 | match self.matched_stripped(parent, /* is_dir */ true) { |
237 | Match::None => path = parent, // walk up |
238 | a_match => return a_match, |
239 | } |
240 | } |
241 | Match::None |
242 | } |
243 | |
244 | /// Like matched, but takes a path that has already been stripped. |
245 | fn matched_stripped<P: AsRef<Path>>( |
246 | &self, |
247 | path: P, |
248 | is_dir: bool, |
249 | ) -> Match<&Glob> { |
250 | if self.is_empty() { |
251 | return Match::None; |
252 | } |
253 | let path = path.as_ref(); |
254 | let mut matches = self.matches.as_ref().unwrap().get(); |
255 | let candidate = Candidate::new(path); |
256 | self.set.matches_candidate_into(&candidate, &mut *matches); |
257 | for &i in matches.iter().rev() { |
258 | let glob = &self.globs[i]; |
259 | if !glob.is_only_dir() || is_dir { |
260 | return if glob.is_whitelist() { |
261 | Match::Whitelist(glob) |
262 | } else { |
263 | Match::Ignore(glob) |
264 | }; |
265 | } |
266 | } |
267 | Match::None |
268 | } |
269 | |
270 | /// Strips the given path such that it's suitable for matching with this |
271 | /// gitignore matcher. |
272 | fn strip<'a, P: 'a + AsRef<Path> + ?Sized>( |
273 | &'a self, |
274 | path: &'a P, |
275 | ) -> &'a Path { |
276 | let mut path = path.as_ref(); |
277 | // A leading ./ is completely superfluous. We also strip it from |
278 | // our gitignore root path, so we need to strip it from our candidate |
279 | // path too. |
280 | if let Some(p) = strip_prefix("./" , path) { |
281 | path = p; |
282 | } |
283 | // Strip any common prefix between the candidate path and the root |
284 | // of the gitignore, to make sure we get relative matching right. |
285 | // BUT, a file name might not have any directory components to it, |
286 | // in which case, we don't want to accidentally strip any part of the |
287 | // file name. |
288 | // |
289 | // As an additional special case, if the root is just `.`, then we |
290 | // shouldn't try to strip anything, e.g., when path begins with a `.`. |
291 | if self.root != Path::new("." ) && !is_file_name(path) { |
292 | if let Some(p) = strip_prefix(&self.root, path) { |
293 | path = p; |
294 | // If we're left with a leading slash, get rid of it. |
295 | if let Some(p) = strip_prefix("/" , path) { |
296 | path = p; |
297 | } |
298 | } |
299 | } |
300 | path |
301 | } |
302 | } |
303 | |
304 | /// Builds a matcher for a single set of globs from a .gitignore file. |
305 | #[derive (Clone, Debug)] |
306 | pub struct GitignoreBuilder { |
307 | builder: GlobSetBuilder, |
308 | root: PathBuf, |
309 | globs: Vec<Glob>, |
310 | case_insensitive: bool, |
311 | } |
312 | |
313 | impl GitignoreBuilder { |
314 | /// Create a new builder for a gitignore file. |
315 | /// |
316 | /// The path given should be the path at which the globs for this gitignore |
317 | /// file should be matched. Note that paths are always matched relative |
318 | /// to the root path given here. Generally, the root path should correspond |
319 | /// to the *directory* containing a `.gitignore` file. |
320 | pub fn new<P: AsRef<Path>>(root: P) -> GitignoreBuilder { |
321 | let root = root.as_ref(); |
322 | GitignoreBuilder { |
323 | builder: GlobSetBuilder::new(), |
324 | root: strip_prefix("./" , root).unwrap_or(root).to_path_buf(), |
325 | globs: vec![], |
326 | case_insensitive: false, |
327 | } |
328 | } |
329 | |
330 | /// Builds a new matcher from the globs added so far. |
331 | /// |
332 | /// Once a matcher is built, no new globs can be added to it. |
333 | pub fn build(&self) -> Result<Gitignore, Error> { |
334 | let nignore = self.globs.iter().filter(|g| !g.is_whitelist()).count(); |
335 | let nwhite = self.globs.iter().filter(|g| g.is_whitelist()).count(); |
336 | let set = self |
337 | .builder |
338 | .build() |
339 | .map_err(|err| Error::Glob { glob: None, err: err.to_string() })?; |
340 | Ok(Gitignore { |
341 | set, |
342 | root: self.root.clone(), |
343 | globs: self.globs.clone(), |
344 | num_ignores: nignore as u64, |
345 | num_whitelists: nwhite as u64, |
346 | matches: Some(Arc::new(Pool::new(|| vec![]))), |
347 | }) |
348 | } |
349 | |
350 | /// Build a global gitignore matcher using the configuration in this |
351 | /// builder. |
352 | /// |
353 | /// This consumes ownership of the builder unlike `build` because it |
354 | /// must mutate the builder to add the global gitignore globs. |
355 | /// |
356 | /// Note that this ignores the path given to this builder's constructor |
357 | /// and instead derives the path automatically from git's global |
358 | /// configuration. |
359 | pub fn build_global(mut self) -> (Gitignore, Option<Error>) { |
360 | match gitconfig_excludes_path() { |
361 | None => (Gitignore::empty(), None), |
362 | Some(path) => { |
363 | if !path.is_file() { |
364 | (Gitignore::empty(), None) |
365 | } else { |
366 | let mut errs = PartialErrorBuilder::default(); |
367 | errs.maybe_push_ignore_io(self.add(path)); |
368 | match self.build() { |
369 | Ok(gi) => (gi, errs.into_error_option()), |
370 | Err(err) => { |
371 | errs.push(err); |
372 | (Gitignore::empty(), errs.into_error_option()) |
373 | } |
374 | } |
375 | } |
376 | } |
377 | } |
378 | } |
379 | |
380 | /// Add each glob from the file path given. |
381 | /// |
382 | /// The file given should be formatted as a `gitignore` file. |
383 | /// |
384 | /// Note that partial errors can be returned. For example, if there was |
385 | /// a problem adding one glob, an error for that will be returned, but |
386 | /// all other valid globs will still be added. |
387 | pub fn add<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> { |
388 | let path = path.as_ref(); |
389 | let file = match File::open(path) { |
390 | Err(err) => return Some(Error::Io(err).with_path(path)), |
391 | Ok(file) => file, |
392 | }; |
393 | log::debug!("opened gitignore file: {}" , path.display()); |
394 | let rdr = BufReader::new(file); |
395 | let mut errs = PartialErrorBuilder::default(); |
396 | for (i, line) in rdr.lines().enumerate() { |
397 | let lineno = (i + 1) as u64; |
398 | let line = match line { |
399 | Ok(line) => line, |
400 | Err(err) => { |
401 | errs.push(Error::Io(err).tagged(path, lineno)); |
402 | break; |
403 | } |
404 | }; |
405 | if let Err(err) = self.add_line(Some(path.to_path_buf()), &line) { |
406 | errs.push(err.tagged(path, lineno)); |
407 | } |
408 | } |
409 | errs.into_error_option() |
410 | } |
411 | |
412 | /// Add each glob line from the string given. |
413 | /// |
414 | /// If this string came from a particular `gitignore` file, then its path |
415 | /// should be provided here. |
416 | /// |
417 | /// The string given should be formatted as a `gitignore` file. |
418 | #[cfg (test)] |
419 | fn add_str( |
420 | &mut self, |
421 | from: Option<PathBuf>, |
422 | gitignore: &str, |
423 | ) -> Result<&mut GitignoreBuilder, Error> { |
424 | for line in gitignore.lines() { |
425 | self.add_line(from.clone(), line)?; |
426 | } |
427 | Ok(self) |
428 | } |
429 | |
430 | /// Add a line from a gitignore file to this builder. |
431 | /// |
432 | /// If this line came from a particular `gitignore` file, then its path |
433 | /// should be provided here. |
434 | /// |
435 | /// If the line could not be parsed as a glob, then an error is returned. |
436 | pub fn add_line( |
437 | &mut self, |
438 | from: Option<PathBuf>, |
439 | mut line: &str, |
440 | ) -> Result<&mut GitignoreBuilder, Error> { |
441 | #![allow (deprecated)] |
442 | |
443 | if line.starts_with("#" ) { |
444 | return Ok(self); |
445 | } |
446 | if !line.ends_with(" \\ " ) { |
447 | line = line.trim_right(); |
448 | } |
449 | if line.is_empty() { |
450 | return Ok(self); |
451 | } |
452 | let mut glob = Glob { |
453 | from, |
454 | original: line.to_string(), |
455 | actual: String::new(), |
456 | is_whitelist: false, |
457 | is_only_dir: false, |
458 | }; |
459 | let mut is_absolute = false; |
460 | if line.starts_with(" \\!" ) || line.starts_with(" \\#" ) { |
461 | line = &line[1..]; |
462 | is_absolute = line.chars().nth(0) == Some('/' ); |
463 | } else { |
464 | if line.starts_with("!" ) { |
465 | glob.is_whitelist = true; |
466 | line = &line[1..]; |
467 | } |
468 | if line.starts_with("/" ) { |
469 | // `man gitignore` says that if a glob starts with a slash, |
470 | // then the glob can only match the beginning of a path |
471 | // (relative to the location of gitignore). We achieve this by |
472 | // simply banning wildcards from matching /. |
473 | line = &line[1..]; |
474 | is_absolute = true; |
475 | } |
476 | } |
477 | // If it ends with a slash, then this should only match directories, |
478 | // but the slash should otherwise not be used while globbing. |
479 | if line.as_bytes().last() == Some(&b'/' ) { |
480 | glob.is_only_dir = true; |
481 | line = &line[..line.len() - 1]; |
482 | // If the slash was escaped, then remove the escape. |
483 | // See: https://github.com/BurntSushi/ripgrep/issues/2236 |
484 | if line.as_bytes().last() == Some(&b' \\' ) { |
485 | line = &line[..line.len() - 1]; |
486 | } |
487 | } |
488 | glob.actual = line.to_string(); |
489 | // If there is a literal slash, then this is a glob that must match the |
490 | // entire path name. Otherwise, we should let it match anywhere, so use |
491 | // a **/ prefix. |
492 | if !is_absolute && !line.chars().any(|c| c == '/' ) { |
493 | // ... but only if we don't already have a **/ prefix. |
494 | if !glob.has_doublestar_prefix() { |
495 | glob.actual = format!("**/ {}" , glob.actual); |
496 | } |
497 | } |
498 | // If the glob ends with `/**`, then we should only match everything |
499 | // inside a directory, but not the directory itself. Standard globs |
500 | // will match the directory. So we add `/*` to force the issue. |
501 | if glob.actual.ends_with("/**" ) { |
502 | glob.actual = format!(" {}/*" , glob.actual); |
503 | } |
504 | let parsed = GlobBuilder::new(&glob.actual) |
505 | .literal_separator(true) |
506 | .case_insensitive(self.case_insensitive) |
507 | .backslash_escape(true) |
508 | .build() |
509 | .map_err(|err| Error::Glob { |
510 | glob: Some(glob.original.clone()), |
511 | err: err.kind().to_string(), |
512 | })?; |
513 | self.builder.add(parsed); |
514 | self.globs.push(glob); |
515 | Ok(self) |
516 | } |
517 | |
518 | /// Toggle whether the globs should be matched case insensitively or not. |
519 | /// |
520 | /// When this option is changed, only globs added after the change will be |
521 | /// affected. |
522 | /// |
523 | /// This is disabled by default. |
524 | pub fn case_insensitive( |
525 | &mut self, |
526 | yes: bool, |
527 | ) -> Result<&mut GitignoreBuilder, Error> { |
528 | // TODO: This should not return a `Result`. Fix this in the next semver |
529 | // release. |
530 | self.case_insensitive = yes; |
531 | Ok(self) |
532 | } |
533 | } |
534 | |
535 | /// Return the file path of the current environment's global gitignore file. |
536 | /// |
537 | /// Note that the file path returned may not exist. |
538 | pub fn gitconfig_excludes_path() -> Option<PathBuf> { |
539 | // git supports $HOME/.gitconfig and $XDG_CONFIG_HOME/git/config. Notably, |
540 | // both can be active at the same time, where $HOME/.gitconfig takes |
541 | // precedent. So if $HOME/.gitconfig defines a `core.excludesFile`, then |
542 | // we're done. |
543 | match gitconfig_home_contents().and_then(|x: Vec| parse_excludes_file(&x)) { |
544 | Some(path: PathBuf) => return Some(path), |
545 | None => {} |
546 | } |
547 | match gitconfig_xdg_contents().and_then(|x: Vec| parse_excludes_file(&x)) { |
548 | Some(path: PathBuf) => return Some(path), |
549 | None => {} |
550 | } |
551 | excludes_file_default() |
552 | } |
553 | |
554 | /// Returns the file contents of git's global config file, if one exists, in |
555 | /// the user's home directory. |
556 | fn gitconfig_home_contents() -> Option<Vec<u8>> { |
557 | let home: PathBuf = match home_dir() { |
558 | None => return None, |
559 | Some(home: PathBuf) => home, |
560 | }; |
561 | let mut file: BufReader = match File::open(path:home.join(path:".gitconfig" )) { |
562 | Err(_) => return None, |
563 | Ok(file: File) => BufReader::new(inner:file), |
564 | }; |
565 | let mut contents: Vec = vec![]; |
566 | file.read_to_end(&mut contents).ok().map(|_| contents) |
567 | } |
568 | |
569 | /// Returns the file contents of git's global config file, if one exists, in |
570 | /// the user's XDG_CONFIG_HOME directory. |
571 | fn gitconfig_xdg_contents() -> Option<Vec<u8>> { |
572 | let path: Option = stdOption::env::var_os(key:"XDG_CONFIG_HOME" ) |
573 | .and_then(|x: OsString| if x.is_empty() { None } else { Some(PathBuf::from(x)) }) |
574 | .or_else(|| home_dir().map(|p: PathBuf| p.join(path:".config" ))) |
575 | .map(|x: PathBuf| x.join(path:"git/config" )); |
576 | let mut file: BufReader = match path.and_then(|p: PathBuf| File::open(path:p).ok()) { |
577 | None => return None, |
578 | Some(file: File) => BufReader::new(inner:file), |
579 | }; |
580 | let mut contents: Vec = vec![]; |
581 | file.read_to_end(&mut contents).ok().map(|_| contents) |
582 | } |
583 | |
584 | /// Returns the default file path for a global .gitignore file. |
585 | /// |
586 | /// Specifically, this respects XDG_CONFIG_HOME. |
587 | fn excludes_file_default() -> Option<PathBuf> { |
588 | stdOption::env::var_os(key:"XDG_CONFIG_HOME" ) |
589 | .and_then(|x: OsString| if x.is_empty() { None } else { Some(PathBuf::from(x)) }) |
590 | .or_else(|| home_dir().map(|p: PathBuf| p.join(path:".config" ))) |
591 | .map(|x: PathBuf| x.join(path:"git/ignore" )) |
592 | } |
593 | |
594 | /// Extract git's `core.excludesfile` config setting from the raw file contents |
595 | /// given. |
596 | fn parse_excludes_file(data: &[u8]) -> Option<PathBuf> { |
597 | use std::sync::OnceLock; |
598 | |
599 | use regex_automata::{meta::Regex, util::syntax}; |
600 | |
601 | // N.B. This is the lazy approach, and isn't technically correct, but |
602 | // probably works in more circumstances. I guess we would ideally have |
603 | // a full INI parser. Yuck. |
604 | static RE: OnceLock<Regex> = OnceLock::new(); |
605 | let re: &Regex = RE.get_or_init(|| { |
606 | RegexResult::builder() |
607 | .configure(Regex::config().utf8_empty(false)) |
608 | .syntax(syntax::Config::new().utf8(false)) |
609 | .build(pattern:r#"(?im-u)^\s*excludesfile\s*=\s*"?\s*(\S+?)\s*"?\s*$"# ) |
610 | .unwrap() |
611 | }); |
612 | // We don't care about amortizing allocs here I think. This should only |
613 | // be called ~once per traversal or so? (Although it's not guaranteed...) |
614 | let mut caps: Captures = re.create_captures(); |
615 | re.captures(input:data, &mut caps); |
616 | let span: Span = caps.get_group(index:1)?; |
617 | let candidate: &[u8] = &data[span]; |
618 | std::str::from_utf8(candidate).ok().map(|s: &str| PathBuf::from(expand_tilde(path:s))) |
619 | } |
620 | |
621 | /// Expands ~ in file paths to the value of $HOME. |
622 | fn expand_tilde(path: &str) -> String { |
623 | let home: String = match home_dir() { |
624 | None => return path.to_string(), |
625 | Some(home: PathBuf) => home.to_string_lossy().into_owned(), |
626 | }; |
627 | path.replace(from:"~" , &home) |
628 | } |
629 | |
630 | /// Returns the location of the user's home directory. |
631 | fn home_dir() -> Option<PathBuf> { |
632 | // We're fine with using std::env::home_dir for now. Its bugs are, IMO, |
633 | // pretty minor corner cases. |
634 | #![allow (deprecated)] |
635 | std::env::home_dir() |
636 | } |
637 | |
638 | #[cfg (test)] |
639 | mod tests { |
640 | use std::path::Path; |
641 | |
642 | use super::{Gitignore, GitignoreBuilder}; |
643 | |
644 | fn gi_from_str<P: AsRef<Path>>(root: P, s: &str) -> Gitignore { |
645 | let mut builder = GitignoreBuilder::new(root); |
646 | builder.add_str(None, s).unwrap(); |
647 | builder.build().unwrap() |
648 | } |
649 | |
650 | macro_rules! ignored { |
651 | ($name:ident, $root:expr, $gi:expr, $path:expr) => { |
652 | ignored!($name, $root, $gi, $path, false); |
653 | }; |
654 | ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => { |
655 | #[test] |
656 | fn $name() { |
657 | let gi = gi_from_str($root, $gi); |
658 | assert!(gi.matched($path, $is_dir).is_ignore()); |
659 | } |
660 | }; |
661 | } |
662 | |
663 | macro_rules! not_ignored { |
664 | ($name:ident, $root:expr, $gi:expr, $path:expr) => { |
665 | not_ignored!($name, $root, $gi, $path, false); |
666 | }; |
667 | ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => { |
668 | #[test] |
669 | fn $name() { |
670 | let gi = gi_from_str($root, $gi); |
671 | assert!(!gi.matched($path, $is_dir).is_ignore()); |
672 | } |
673 | }; |
674 | } |
675 | |
676 | const ROOT: &'static str = "/home/foobar/rust/rg" ; |
677 | |
678 | ignored!(ig1, ROOT, "months" , "months" ); |
679 | ignored!(ig2, ROOT, "*.lock" , "Cargo.lock" ); |
680 | ignored!(ig3, ROOT, "*.rs" , "src/main.rs" ); |
681 | ignored!(ig4, ROOT, "src/*.rs" , "src/main.rs" ); |
682 | ignored!(ig5, ROOT, "/*.c" , "cat-file.c" ); |
683 | ignored!(ig6, ROOT, "/src/*.rs" , "src/main.rs" ); |
684 | ignored!(ig7, ROOT, "!src/main.rs \n*.rs" , "src/main.rs" ); |
685 | ignored!(ig8, ROOT, "foo/" , "foo" , true); |
686 | ignored!(ig9, ROOT, "**/foo" , "foo" ); |
687 | ignored!(ig10, ROOT, "**/foo" , "src/foo" ); |
688 | ignored!(ig11, ROOT, "**/foo/**" , "src/foo/bar" ); |
689 | ignored!(ig12, ROOT, "**/foo/**" , "wat/src/foo/bar/baz" ); |
690 | ignored!(ig13, ROOT, "**/foo/bar" , "foo/bar" ); |
691 | ignored!(ig14, ROOT, "**/foo/bar" , "src/foo/bar" ); |
692 | ignored!(ig15, ROOT, "abc/**" , "abc/x" ); |
693 | ignored!(ig16, ROOT, "abc/**" , "abc/x/y" ); |
694 | ignored!(ig17, ROOT, "abc/**" , "abc/x/y/z" ); |
695 | ignored!(ig18, ROOT, "a/**/b" , "a/b" ); |
696 | ignored!(ig19, ROOT, "a/**/b" , "a/x/b" ); |
697 | ignored!(ig20, ROOT, "a/**/b" , "a/x/y/b" ); |
698 | ignored!(ig21, ROOT, r"\!xy" , "!xy" ); |
699 | ignored!(ig22, ROOT, r"\#foo" , "#foo" ); |
700 | ignored!(ig23, ROOT, "foo" , "./foo" ); |
701 | ignored!(ig24, ROOT, "target" , "grep/target" ); |
702 | ignored!(ig25, ROOT, "Cargo.lock" , "./tabwriter-bin/Cargo.lock" ); |
703 | ignored!(ig26, ROOT, "/foo/bar/baz" , "./foo/bar/baz" ); |
704 | ignored!(ig27, ROOT, "foo/" , "xyz/foo" , true); |
705 | ignored!(ig28, "./src" , "/llvm/" , "./src/llvm" , true); |
706 | ignored!(ig29, ROOT, "node_modules/ " , "node_modules" , true); |
707 | ignored!(ig30, ROOT, "**/" , "foo/bar" , true); |
708 | ignored!(ig31, ROOT, "path1/*" , "path1/foo" ); |
709 | ignored!(ig32, ROOT, ".a/b" , ".a/b" ); |
710 | ignored!(ig33, "./" , ".a/b" , ".a/b" ); |
711 | ignored!(ig34, "." , ".a/b" , ".a/b" ); |
712 | ignored!(ig35, "./." , ".a/b" , ".a/b" ); |
713 | ignored!(ig36, "././" , ".a/b" , ".a/b" ); |
714 | ignored!(ig37, "././." , ".a/b" , ".a/b" ); |
715 | ignored!(ig38, ROOT, " \\[" , "[" ); |
716 | ignored!(ig39, ROOT, " \\?" , "?" ); |
717 | ignored!(ig40, ROOT, " \\*" , "*" ); |
718 | ignored!(ig41, ROOT, " \\a" , "a" ); |
719 | ignored!(ig42, ROOT, "s*.rs" , "sfoo.rs" ); |
720 | ignored!(ig43, ROOT, "**" , "foo.rs" ); |
721 | ignored!(ig44, ROOT, "**/**/*" , "a/foo.rs" ); |
722 | |
723 | not_ignored!(ignot1, ROOT, "amonths" , "months" ); |
724 | not_ignored!(ignot2, ROOT, "monthsa" , "months" ); |
725 | not_ignored!(ignot3, ROOT, "/src/*.rs" , "src/grep/src/main.rs" ); |
726 | not_ignored!(ignot4, ROOT, "/*.c" , "mozilla-sha1/sha1.c" ); |
727 | not_ignored!(ignot5, ROOT, "/src/*.rs" , "src/grep/src/main.rs" ); |
728 | not_ignored!(ignot6, ROOT, "*.rs \n!src/main.rs" , "src/main.rs" ); |
729 | not_ignored!(ignot7, ROOT, "foo/" , "foo" , false); |
730 | not_ignored!(ignot8, ROOT, "**/foo/**" , "wat/src/afoo/bar/baz" ); |
731 | not_ignored!(ignot9, ROOT, "**/foo/**" , "wat/src/fooa/bar/baz" ); |
732 | not_ignored!(ignot10, ROOT, "**/foo/bar" , "foo/src/bar" ); |
733 | not_ignored!(ignot11, ROOT, "#foo" , "#foo" ); |
734 | not_ignored!(ignot12, ROOT, " \n\n\n" , "foo" ); |
735 | not_ignored!(ignot13, ROOT, "foo/**" , "foo" , true); |
736 | not_ignored!( |
737 | ignot14, |
738 | "./third_party/protobuf" , |
739 | "m4/ltoptions.m4" , |
740 | "./third_party/protobuf/csharp/src/packages/repositories.config" |
741 | ); |
742 | not_ignored!(ignot15, ROOT, "!/bar" , "foo/bar" ); |
743 | not_ignored!(ignot16, ROOT, "* \n!**/" , "foo" , true); |
744 | not_ignored!(ignot17, ROOT, "src/*.rs" , "src/grep/src/main.rs" ); |
745 | not_ignored!(ignot18, ROOT, "path1/*" , "path2/path1/foo" ); |
746 | not_ignored!(ignot19, ROOT, "s*.rs" , "src/foo.rs" ); |
747 | |
748 | fn bytes(s: &str) -> Vec<u8> { |
749 | s.to_string().into_bytes() |
750 | } |
751 | |
752 | fn path_string<P: AsRef<Path>>(path: P) -> String { |
753 | path.as_ref().to_str().unwrap().to_string() |
754 | } |
755 | |
756 | #[test ] |
757 | fn parse_excludes_file1() { |
758 | let data = bytes("[core] \nexcludesFile = /foo/bar" ); |
759 | let got = super::parse_excludes_file(&data).unwrap(); |
760 | assert_eq!(path_string(got), "/foo/bar" ); |
761 | } |
762 | |
763 | #[test ] |
764 | fn parse_excludes_file2() { |
765 | let data = bytes("[core] \nexcludesFile = ~/foo/bar" ); |
766 | let got = super::parse_excludes_file(&data).unwrap(); |
767 | assert_eq!(path_string(got), super::expand_tilde("~/foo/bar" )); |
768 | } |
769 | |
770 | #[test ] |
771 | fn parse_excludes_file3() { |
772 | let data = bytes("[core] \nexcludeFile = /foo/bar" ); |
773 | assert!(super::parse_excludes_file(&data).is_none()); |
774 | } |
775 | |
776 | #[test ] |
777 | fn parse_excludes_file4() { |
778 | let data = bytes("[core] \nexcludesFile = \"~/foo/bar \"" ); |
779 | let got = super::parse_excludes_file(&data); |
780 | assert_eq!( |
781 | path_string(got.unwrap()), |
782 | super::expand_tilde("~/foo/bar" ) |
783 | ); |
784 | } |
785 | |
786 | #[test ] |
787 | fn parse_excludes_file5() { |
788 | let data = bytes("[core] \nexcludesFile = \" \"~/foo/bar \" \"" ); |
789 | assert!(super::parse_excludes_file(&data).is_none()); |
790 | } |
791 | |
792 | // See: https://github.com/BurntSushi/ripgrep/issues/106 |
793 | #[test ] |
794 | fn regression_106() { |
795 | gi_from_str("/" , " " ); |
796 | } |
797 | |
798 | #[test ] |
799 | fn case_insensitive() { |
800 | let gi = GitignoreBuilder::new(ROOT) |
801 | .case_insensitive(true) |
802 | .unwrap() |
803 | .add_str(None, "*.html" ) |
804 | .unwrap() |
805 | .build() |
806 | .unwrap(); |
807 | assert!(gi.matched("foo.html" , false).is_ignore()); |
808 | assert!(gi.matched("foo.HTML" , false).is_ignore()); |
809 | assert!(!gi.matched("foo.htm" , false).is_ignore()); |
810 | assert!(!gi.matched("foo.HTM" , false).is_ignore()); |
811 | } |
812 | |
813 | ignored!(cs1, ROOT, "*.html" , "foo.html" ); |
814 | not_ignored!(cs2, ROOT, "*.html" , "foo.HTML" ); |
815 | not_ignored!(cs3, ROOT, "*.html" , "foo.htm" ); |
816 | not_ignored!(cs4, ROOT, "*.html" , "foo.HTM" ); |
817 | } |
818 | |