1 | //! Gets metadata about a workspace from Cargo |
2 | |
3 | use std::collections::BTreeMap; |
4 | use std::ffi::OsStr; |
5 | use std::path::{Path, PathBuf}; |
6 | |
7 | /// Describes how this module can fail |
8 | #[derive (Debug, thiserror::Error)] |
9 | pub enum Error { |
10 | #[error("I/O Error: {0:?}" )] |
11 | Io(#[from] std::io::Error), |
12 | #[error("Failed get output from cargo-metadata: {0:?}" )] |
13 | GettingMetadata(#[from] cargo_metadata::Error), |
14 | #[error("Bad path {0:?} whilst scraping files" )] |
15 | Scraping(PathBuf), |
16 | } |
17 | |
18 | /// Uniquely describes a package on crates.io |
19 | #[derive (Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] |
20 | pub struct Package { |
21 | /// The name of the package |
22 | pub name: String, |
23 | /// The version number |
24 | pub version: String, |
25 | } |
26 | |
27 | /// Extra data about a package |
28 | #[derive (Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] |
29 | pub struct PackageMetadata { |
30 | /// The license it is under |
31 | pub license: String, |
32 | /// The list of authors from the package metadata |
33 | pub authors: Vec<String>, |
34 | /// A list of important files from the package, with their contents. |
35 | /// |
36 | /// This includes *COPYRIGHT*, *NOTICE*, *AUTHOR*, *LICENSE*, and *LICENCE* files, case-insensitive. |
37 | pub notices: BTreeMap<String, String>, |
38 | /// If this is true, this dep is in the Rust Standard Library |
39 | pub is_in_libstd: Option<bool>, |
40 | } |
41 | |
42 | /// Use `cargo metadata` to get a list of dependencies and their license data. License files will |
43 | /// also be pulled from the vendor path (generated by bootstrap). |
44 | /// |
45 | /// Any dependency with a path beginning with `root_path` is ignored, as we assume `reuse` has |
46 | /// covered it already. |
47 | pub fn get_metadata_and_notices( |
48 | cargo: &Path, |
49 | vendor_path: &Path, |
50 | root_path: &Path, |
51 | manifest_paths: &[PathBuf], |
52 | ) -> Result<BTreeMap<Package, PackageMetadata>, Error> { |
53 | let mut output: BTreeMap = get_metadata(cargo, root_path, manifest_paths)?; |
54 | |
55 | // Now for each dependency we found, go and grab any important looking files |
56 | for (package: &Package, metadata: &mut PackageMetadata) in output.iter_mut() { |
57 | load_important_files(package, dep:metadata, &vendor_path)?; |
58 | } |
59 | |
60 | Ok(output) |
61 | } |
62 | |
63 | /// Use `cargo metadata` to get a list of dependencies and their license data. |
64 | /// |
65 | /// Any dependency with a path beginning with `root_path` is ignored, as we |
66 | /// assume `reuse` has covered it already. |
67 | pub fn get_metadata( |
68 | cargo: &Path, |
69 | root_path: &Path, |
70 | manifest_paths: &[PathBuf], |
71 | ) -> Result<BTreeMap<Package, PackageMetadata>, Error> { |
72 | let mut output = BTreeMap::new(); |
73 | // Look at the metadata for each manifest |
74 | for manifest_path in manifest_paths { |
75 | if manifest_path.file_name() != Some(OsStr::new("Cargo.toml" )) { |
76 | panic!("cargo_manifest::get requires a path to a Cargo.toml file" ); |
77 | } |
78 | let metadata = cargo_metadata::MetadataCommand::new() |
79 | .cargo_path(cargo) |
80 | .env("RUSTC_BOOTSTRAP" , "1" ) |
81 | .manifest_path(manifest_path) |
82 | .exec()?; |
83 | for package in metadata.packages { |
84 | let manifest_path = package.manifest_path.as_path(); |
85 | if manifest_path.starts_with(root_path) { |
86 | // it's an in-tree dependency and reuse covers it |
87 | continue; |
88 | } |
89 | // otherwise it's an out-of-tree dependency |
90 | let package_id = Package { name: package.name, version: package.version.to_string() }; |
91 | output.insert( |
92 | package_id, |
93 | PackageMetadata { |
94 | license: package.license.unwrap_or_else(|| String::from("Unspecified" )), |
95 | authors: package.authors, |
96 | notices: BTreeMap::new(), |
97 | is_in_libstd: None, |
98 | }, |
99 | ); |
100 | } |
101 | } |
102 | |
103 | Ok(output) |
104 | } |
105 | |
106 | /// Add important files off disk into this dependency. |
107 | /// |
108 | /// Maybe one-day Cargo.toml will contain enough information that we don't need |
109 | /// to do this manual scraping. |
110 | fn load_important_files( |
111 | package: &Package, |
112 | dep: &mut PackageMetadata, |
113 | vendor_root: &Path, |
114 | ) -> Result<(), Error> { |
115 | let name_version = format!(" {}- {}" , package.name, package.version); |
116 | println!("Scraping notices for {}..." , name_version); |
117 | let dep_vendor_path = vendor_root.join(name_version); |
118 | for entry in std::fs::read_dir(dep_vendor_path)? { |
119 | let entry = entry?; |
120 | let metadata = entry.metadata()?; |
121 | let path = entry.path(); |
122 | let Some(filename) = path.file_name() else { |
123 | return Err(Error::Scraping(path)); |
124 | }; |
125 | let lc_filename = filename.to_ascii_lowercase(); |
126 | let lc_filename_str = lc_filename.to_string_lossy(); |
127 | let mut keep = false; |
128 | for m in ["copyright" , "licence" , "license" , "author" , "notice" ] { |
129 | if lc_filename_str.contains(m) { |
130 | keep = true; |
131 | break; |
132 | } |
133 | } |
134 | if keep { |
135 | if metadata.is_dir() { |
136 | for inner_entry in std::fs::read_dir(entry.path())? { |
137 | let inner_entry = inner_entry?; |
138 | if inner_entry.metadata()?.is_file() { |
139 | let inner_filename = inner_entry.file_name(); |
140 | let inner_filename_str = inner_filename.to_string_lossy(); |
141 | let qualified_filename = |
142 | format!(" {}/ {}" , lc_filename_str, inner_filename_str); |
143 | println!("Scraping {}" , qualified_filename); |
144 | dep.notices.insert( |
145 | qualified_filename.to_string(), |
146 | std::fs::read_to_string(inner_entry.path())?, |
147 | ); |
148 | } |
149 | } |
150 | } else if metadata.is_file() { |
151 | let filename = filename.to_string_lossy(); |
152 | println!("Scraping {}" , filename); |
153 | dep.notices.insert(filename.to_string(), std::fs::read_to_string(path)?); |
154 | } |
155 | } |
156 | } |
157 | Ok(()) |
158 | } |
159 | |