1// SPDX-FileCopyrightText: 2021 HH Partners
2//
3// SPDX-License-Identifier: MIT
4
5use std::collections::HashSet;
6
7use log::info;
8use serde::{Deserialize, Serialize};
9use uuid::Uuid;
10
11use super::{
12 Algorithm, Annotation, DocumentCreationInformation, FileInformation,
13 OtherLicensingInformationDetected, PackageInformation, Relationship, Snippet,
14};
15
16/// A representation of an [SPDX Document]
17///
18/// This is the main struct of this crate. The struct implements [`Serialize`] and [`Deserialize`]
19/// to allow it to be serialized into and deserialized from any data format supported by [Serde].
20///
21/// # SPDX specification version
22///
23/// The crate has been developed around SPDX version 2.2.1. Fields deprecated in 2.2.1, like
24/// [review information] are not supported. The plan is to support newer versions as they are
25/// released.
26///
27/// # Data formats
28///
29/// The crate has been developed for usage with JSON SPDX documents. The naming of the fields should
30/// conform to the spec for at least JSON. Other formats, like YAML may work, but no guarantees are
31/// made.
32///
33/// The crate also allows for deserializing the struct from SPDX documents in [tag-value format]
34/// with [`crate::parsers::spdx_from_tag_value`].
35///
36/// [SPDX Document]: https://spdx.github.io/spdx-spec/composition-of-an-SPDX-document/
37/// [Serde]: https://serde.rs
38/// [review information]: https://spdx.github.io/spdx-spec/review-information-deprecated/
39/// [tag-value format]: https://spdx.github.io/spdx-spec/conformance/
40#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
41#[serde(rename_all = "camelCase", deny_unknown_fields)]
42pub struct SPDX {
43 /// <https://spdx.github.io/spdx-spec/2-document-creation-information/>
44 #[serde(flatten)]
45 pub document_creation_information: DocumentCreationInformation,
46
47 /// <https://spdx.github.io/spdx-spec/3-package-information/>
48 #[serde(rename = "packages")]
49 #[serde(default)]
50 pub package_information: Vec<PackageInformation>,
51
52 /// <https://spdx.github.io/spdx-spec/6-other-licensing-information-detected/>
53 #[serde(rename = "hasExtractedLicensingInfos")]
54 #[serde(default)]
55 pub other_licensing_information_detected: Vec<OtherLicensingInformationDetected>,
56
57 /// <https://spdx.github.io/spdx-spec/4-file-information/>
58 #[serde(rename = "files")]
59 #[serde(default)]
60 pub file_information: Vec<FileInformation>,
61
62 /// <https://spdx.github.io/spdx-spec/5-snippet-information/>
63 #[serde(rename = "snippets")]
64 #[serde(default)]
65 pub snippet_information: Vec<Snippet>,
66
67 /// <https://spdx.github.io/spdx-spec/7-relationships-between-SPDX-elements/>
68 #[serde(default)]
69 pub relationships: Vec<Relationship>,
70
71 /// <https://spdx.github.io/spdx-spec/8-annotations/>
72 #[serde(default)]
73 pub annotations: Vec<Annotation>,
74
75 /// Counter for creating SPDXRefs. Is not part of the spec, so don't serialize.
76 #[serde(skip)]
77 pub spdx_ref_counter: i32,
78}
79
80impl SPDX {
81 /// Create new SPDX struct.
82 pub fn new(name: &str) -> Self {
83 info!("Creating SPDX.");
84
85 Self {
86 document_creation_information: DocumentCreationInformation {
87 document_name: name.to_string(),
88 spdx_document_namespace: format!(
89 "http://spdx.org/spdxdocs/{}-{}",
90 name,
91 Uuid::new_v4()
92 ),
93 ..DocumentCreationInformation::default()
94 },
95 package_information: Vec::new(),
96 other_licensing_information_detected: Vec::new(),
97 file_information: Vec::new(),
98 relationships: Vec::new(),
99 spdx_ref_counter: 0,
100 annotations: Vec::new(),
101 snippet_information: Vec::new(),
102 }
103 }
104
105 /// Get unique hashes for all files the SPDX.
106 pub fn get_unique_hashes(&self, algorithm: Algorithm) -> HashSet<String> {
107 info!("Getting unique hashes for files in SPDX.");
108
109 let mut unique_hashes: HashSet<String> = HashSet::new();
110
111 for file_information in &self.file_information {
112 if let Some(checksum) = file_information.checksum(algorithm) {
113 unique_hashes.insert(checksum.to_string());
114 }
115 }
116
117 unique_hashes
118 }
119
120 /// Find related files of the package with the provided id.
121 pub fn get_files_for_package(
122 &self,
123 package_spdx_id: &str,
124 ) -> Vec<(&FileInformation, &Relationship)> {
125 info!("Finding related files for package {}.", &package_spdx_id);
126
127 let relationships = self
128 .relationships
129 .iter()
130 .filter(|relationship| relationship.spdx_element_id == package_spdx_id);
131
132 let mut result: Vec<(&FileInformation, &Relationship)> = Vec::new();
133
134 for relationship in relationships {
135 let file = self
136 .file_information
137 .iter()
138 .find(|file| file.file_spdx_identifier == relationship.related_spdx_element);
139 if let Some(file) = file {
140 result.push((file, relationship));
141 };
142 }
143
144 result
145 }
146
147 /// Get all license identifiers from the SPDX.
148 ///
149 /// # Errors
150 ///
151 /// Returns [`SpdxError`] if parsing of the expressions fails.
152 pub fn get_license_ids(&self) -> HashSet<String> {
153 info!("Getting all license identifiers from SPDX.");
154
155 let mut license_ids = HashSet::new();
156
157 for file in &self.file_information {
158 if let Some(concluded_license) = &file.concluded_license {
159 for license in concluded_license.identifiers() {
160 if license != "NOASSERTION" && license != "NONE" {
161 license_ids.insert(license.clone());
162 }
163 }
164 }
165 }
166
167 license_ids
168 }
169
170 /// Get all relationships where the given SPDX ID is the SPDX element id.
171 pub fn relationships_for_spdx_id(&self, spdx_id: &str) -> Vec<&Relationship> {
172 self.relationships
173 .iter()
174 .filter(|relationship| relationship.spdx_element_id == spdx_id)
175 .collect()
176 }
177
178 /// Get all relationships where the given SPDX ID is the related SPDX element id.
179 pub fn relationships_for_related_spdx_id(&self, spdx_id: &str) -> Vec<&Relationship> {
180 self.relationships
181 .iter()
182 .filter(|relationship| relationship.related_spdx_element == spdx_id)
183 .collect()
184 }
185}
186
187#[cfg(test)]
188mod test {
189 use std::{fs::read_to_string, iter::FromIterator};
190
191 use spdx_expression::SpdxExpression;
192
193 use crate::models::RelationshipType;
194
195 use super::*;
196
197 #[test]
198 fn deserialize_simple_spdx() {
199 let spdx_file: SPDX = serde_json::from_str(
200 &read_to_string("tests/data/SPDXJSONExample-v2.2.spdx.json").unwrap(),
201 )
202 .unwrap();
203
204 assert_eq!(
205 spdx_file.document_creation_information.document_name,
206 "SPDX-Tools-v2.0".to_string()
207 );
208 }
209
210 #[test]
211 fn find_related_files_for_package() {
212 let spdx_file: SPDX = serde_json::from_str(
213 &read_to_string("tests/data/SPDXJSONExample-v2.2.spdx.json").unwrap(),
214 )
215 .unwrap();
216
217 let package_1_files = spdx_file.get_files_for_package("SPDXRef-Package");
218
219 assert_eq!(package_1_files.len(), 1);
220
221 let file = package_1_files
222 .iter()
223 .find(|package_and_relationship| {
224 package_and_relationship.0.file_name == *"./lib-source/jena-2.6.3-sources.jar"
225 })
226 .expect("Should always be found");
227
228 assert_eq!(file.0.file_spdx_identifier, "SPDXRef-JenaLib");
229 assert_eq!(file.1.relationship_type, RelationshipType::Contains);
230
231 assert_eq!(
232 file.0.concluded_license,
233 Some(SpdxExpression::parse("LicenseRef-1").unwrap())
234 );
235 }
236
237 #[test]
238 fn get_all_licenses_from_spdx() {
239 let spdx_file: SPDX = serde_json::from_str(
240 &read_to_string("tests/data/SPDXJSONExample-v2.2.spdx.json").unwrap(),
241 )
242 .unwrap();
243
244 let actual = spdx_file.get_license_ids();
245
246 let expected = HashSet::from_iter([
247 "Apache-2.0".into(),
248 "LicenseRef-1".into(),
249 "LGPL-2.0-only".into(),
250 "LicenseRef-2".into(),
251 ]);
252
253 assert_eq!(expected, actual);
254 }
255
256 #[test]
257 fn get_relationships_for_spdx_id() {
258 let spdx_file: SPDX = serde_json::from_str(
259 &read_to_string("tests/data/SPDXJSONExample-v2.2.spdx.json").unwrap(),
260 )
261 .unwrap();
262
263 let relationships = spdx_file.relationships_for_spdx_id("SPDXRef-Package");
264 let relationship_1 = Relationship {
265 spdx_element_id: "SPDXRef-Package".into(),
266 related_spdx_element: "SPDXRef-Saxon".into(),
267 relationship_type: RelationshipType::DynamicLink,
268 comment: None,
269 };
270 let relationship_2 = Relationship {
271 spdx_element_id: "SPDXRef-Package".into(),
272 related_spdx_element: "SPDXRef-JenaLib".into(),
273 relationship_type: RelationshipType::Contains,
274 comment: None,
275 };
276 let expected_relationships = vec![&relationship_1, &relationship_2];
277
278 assert_eq!(relationships, expected_relationships);
279 }
280
281 #[test]
282 fn get_relationships_for_related_spdx_id() {
283 let spdx_file: SPDX = serde_json::from_str(
284 &read_to_string("tests/data/SPDXJSONExample-v2.2.spdx.json").unwrap(),
285 )
286 .unwrap();
287
288 let relationships = spdx_file.relationships_for_related_spdx_id("SPDXRef-Package");
289 let relationship_1 = Relationship {
290 spdx_element_id: "SPDXRef-DOCUMENT".into(),
291 related_spdx_element: "SPDXRef-Package".into(),
292 relationship_type: RelationshipType::Contains,
293 comment: None,
294 };
295 let relationship_2 = Relationship {
296 spdx_element_id: "SPDXRef-DOCUMENT".into(),
297 related_spdx_element: "SPDXRef-Package".into(),
298 relationship_type: RelationshipType::Describes,
299 comment: None,
300 };
301 let relationship_3 = Relationship {
302 spdx_element_id: "SPDXRef-JenaLib".into(),
303 related_spdx_element: "SPDXRef-Package".into(),
304 relationship_type: RelationshipType::Contains,
305 comment: None,
306 };
307 let expected_relationships = vec![&relationship_1, &relationship_2, &relationship_3];
308
309 assert_eq!(relationships, expected_relationships);
310 }
311
312 #[test]
313 fn get_unique_hashes_for_files() {
314 let spdx_file: SPDX = serde_json::from_str(
315 &read_to_string("tests/data/SPDXJSONExample-v2.2.spdx.json").unwrap(),
316 )
317 .unwrap();
318 let hashes = spdx_file.get_unique_hashes(Algorithm::SHA1);
319
320 let expected = [
321 "2fd4e1c67a2d28fced849ee1bb76e7391b93eb12".to_string(),
322 "c2b4e1c67a2d28fced849ee1bb76e7391b93f125".to_string(),
323 "3ab4e1c67a2d28fced849ee1bb76e7391b93f125".to_string(),
324 "d6a770ba38583ed4bb4525bd96e50461655d2758".to_string(),
325 ]
326 .iter()
327 .cloned()
328 .collect::<HashSet<_>>();
329
330 assert_eq!(hashes, expected);
331 }
332}
333