| 1 | // SPDX-FileCopyrightText: 2021 HH Partners |
| 2 | // |
| 3 | // SPDX-License-Identifier: MIT |
| 4 | |
| 5 | use std::collections::HashSet; |
| 6 | |
| 7 | use log::info; |
| 8 | use serde::{Deserialize, Serialize}; |
| 9 | use uuid::Uuid; |
| 10 | |
| 11 | use super::{ |
| 12 | Algorithm, Annotation, DocumentCreationInformation, FileInformation, |
| 13 | OtherLicensingInformationDetected, PackageInformation, Relationship, Snippet, |
| 14 | }; |
| 15 | |
| 16 | /// A representation of an [SPDX Document] |
| 17 | /// |
| 18 | /// This is the main struct of this crate. The struct implements [`Serialize`] and [`Deserialize`] |
| 19 | /// to allow it to be serialized into and deserialized from any data format supported by [Serde]. |
| 20 | /// |
| 21 | /// # SPDX specification version |
| 22 | /// |
| 23 | /// The crate has been developed around SPDX version 2.2.1. Fields deprecated in 2.2.1, like |
| 24 | /// [review information] are not supported. The plan is to support newer versions as they are |
| 25 | /// released. |
| 26 | /// |
| 27 | /// # Data formats |
| 28 | /// |
| 29 | /// The crate has been developed for usage with JSON SPDX documents. The naming of the fields should |
| 30 | /// conform to the spec for at least JSON. Other formats, like YAML may work, but no guarantees are |
| 31 | /// made. |
| 32 | /// |
| 33 | /// The crate also allows for deserializing the struct from SPDX documents in [tag-value format] |
| 34 | /// with [`crate::parsers::spdx_from_tag_value`]. |
| 35 | /// |
| 36 | /// [SPDX Document]: https://spdx.github.io/spdx-spec/composition-of-an-SPDX-document/ |
| 37 | /// [Serde]: https://serde.rs |
| 38 | /// [review information]: https://spdx.github.io/spdx-spec/review-information-deprecated/ |
| 39 | /// [tag-value format]: https://spdx.github.io/spdx-spec/conformance/ |
| 40 | #[derive (Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] |
| 41 | #[serde(rename_all = "camelCase" , deny_unknown_fields)] |
| 42 | pub struct SPDX { |
| 43 | /// <https://spdx.github.io/spdx-spec/2-document-creation-information/> |
| 44 | #[serde(flatten)] |
| 45 | pub document_creation_information: DocumentCreationInformation, |
| 46 | |
| 47 | /// <https://spdx.github.io/spdx-spec/3-package-information/> |
| 48 | #[serde(rename = "packages" )] |
| 49 | #[serde(default)] |
| 50 | pub package_information: Vec<PackageInformation>, |
| 51 | |
| 52 | /// <https://spdx.github.io/spdx-spec/6-other-licensing-information-detected/> |
| 53 | #[serde(rename = "hasExtractedLicensingInfos" )] |
| 54 | #[serde(default)] |
| 55 | pub other_licensing_information_detected: Vec<OtherLicensingInformationDetected>, |
| 56 | |
| 57 | /// <https://spdx.github.io/spdx-spec/4-file-information/> |
| 58 | #[serde(rename = "files" )] |
| 59 | #[serde(default)] |
| 60 | pub file_information: Vec<FileInformation>, |
| 61 | |
| 62 | /// <https://spdx.github.io/spdx-spec/5-snippet-information/> |
| 63 | #[serde(rename = "snippets" )] |
| 64 | #[serde(default)] |
| 65 | pub snippet_information: Vec<Snippet>, |
| 66 | |
| 67 | /// <https://spdx.github.io/spdx-spec/7-relationships-between-SPDX-elements/> |
| 68 | #[serde(default)] |
| 69 | pub relationships: Vec<Relationship>, |
| 70 | |
| 71 | /// <https://spdx.github.io/spdx-spec/8-annotations/> |
| 72 | #[serde(default)] |
| 73 | pub annotations: Vec<Annotation>, |
| 74 | |
| 75 | /// Counter for creating SPDXRefs. Is not part of the spec, so don't serialize. |
| 76 | #[serde(skip)] |
| 77 | pub spdx_ref_counter: i32, |
| 78 | } |
| 79 | |
| 80 | impl SPDX { |
| 81 | /// Create new SPDX struct. |
| 82 | pub fn new(name: &str) -> Self { |
| 83 | info!("Creating SPDX." ); |
| 84 | |
| 85 | Self { |
| 86 | document_creation_information: DocumentCreationInformation { |
| 87 | document_name: name.to_string(), |
| 88 | spdx_document_namespace: format!( |
| 89 | "http://spdx.org/spdxdocs/ {}- {}" , |
| 90 | name, |
| 91 | Uuid::new_v4() |
| 92 | ), |
| 93 | ..DocumentCreationInformation::default() |
| 94 | }, |
| 95 | package_information: Vec::new(), |
| 96 | other_licensing_information_detected: Vec::new(), |
| 97 | file_information: Vec::new(), |
| 98 | relationships: Vec::new(), |
| 99 | spdx_ref_counter: 0, |
| 100 | annotations: Vec::new(), |
| 101 | snippet_information: Vec::new(), |
| 102 | } |
| 103 | } |
| 104 | |
| 105 | /// Get unique hashes for all files the SPDX. |
| 106 | pub fn get_unique_hashes(&self, algorithm: Algorithm) -> HashSet<String> { |
| 107 | info!("Getting unique hashes for files in SPDX." ); |
| 108 | |
| 109 | let mut unique_hashes: HashSet<String> = HashSet::new(); |
| 110 | |
| 111 | for file_information in &self.file_information { |
| 112 | if let Some(checksum) = file_information.checksum(algorithm) { |
| 113 | unique_hashes.insert(checksum.to_string()); |
| 114 | } |
| 115 | } |
| 116 | |
| 117 | unique_hashes |
| 118 | } |
| 119 | |
| 120 | /// Find related files of the package with the provided id. |
| 121 | pub fn get_files_for_package( |
| 122 | &self, |
| 123 | package_spdx_id: &str, |
| 124 | ) -> Vec<(&FileInformation, &Relationship)> { |
| 125 | info!("Finding related files for package {}." , &package_spdx_id); |
| 126 | |
| 127 | let relationships = self |
| 128 | .relationships |
| 129 | .iter() |
| 130 | .filter(|relationship| relationship.spdx_element_id == package_spdx_id); |
| 131 | |
| 132 | let mut result: Vec<(&FileInformation, &Relationship)> = Vec::new(); |
| 133 | |
| 134 | for relationship in relationships { |
| 135 | let file = self |
| 136 | .file_information |
| 137 | .iter() |
| 138 | .find(|file| file.file_spdx_identifier == relationship.related_spdx_element); |
| 139 | if let Some(file) = file { |
| 140 | result.push((file, relationship)); |
| 141 | }; |
| 142 | } |
| 143 | |
| 144 | result |
| 145 | } |
| 146 | |
| 147 | /// Get all license identifiers from the SPDX. |
| 148 | /// |
| 149 | /// # Errors |
| 150 | /// |
| 151 | /// Returns [`SpdxError`] if parsing of the expressions fails. |
| 152 | pub fn get_license_ids(&self) -> HashSet<String> { |
| 153 | info!("Getting all license identifiers from SPDX." ); |
| 154 | |
| 155 | let mut license_ids = HashSet::new(); |
| 156 | |
| 157 | for file in &self.file_information { |
| 158 | if let Some(concluded_license) = &file.concluded_license { |
| 159 | for license in concluded_license.identifiers() { |
| 160 | if license != "NOASSERTION" && license != "NONE" { |
| 161 | license_ids.insert(license.clone()); |
| 162 | } |
| 163 | } |
| 164 | } |
| 165 | } |
| 166 | |
| 167 | license_ids |
| 168 | } |
| 169 | |
| 170 | /// Get all relationships where the given SPDX ID is the SPDX element id. |
| 171 | pub fn relationships_for_spdx_id(&self, spdx_id: &str) -> Vec<&Relationship> { |
| 172 | self.relationships |
| 173 | .iter() |
| 174 | .filter(|relationship| relationship.spdx_element_id == spdx_id) |
| 175 | .collect() |
| 176 | } |
| 177 | |
| 178 | /// Get all relationships where the given SPDX ID is the related SPDX element id. |
| 179 | pub fn relationships_for_related_spdx_id(&self, spdx_id: &str) -> Vec<&Relationship> { |
| 180 | self.relationships |
| 181 | .iter() |
| 182 | .filter(|relationship| relationship.related_spdx_element == spdx_id) |
| 183 | .collect() |
| 184 | } |
| 185 | } |
| 186 | |
| 187 | #[cfg (test)] |
| 188 | mod test { |
| 189 | use std::{fs::read_to_string, iter::FromIterator}; |
| 190 | |
| 191 | use spdx_expression::SpdxExpression; |
| 192 | |
| 193 | use crate::models::RelationshipType; |
| 194 | |
| 195 | use super::*; |
| 196 | |
| 197 | #[test ] |
| 198 | fn deserialize_simple_spdx() { |
| 199 | let spdx_file: SPDX = serde_json::from_str( |
| 200 | &read_to_string("tests/data/SPDXJSONExample-v2.2.spdx.json" ).unwrap(), |
| 201 | ) |
| 202 | .unwrap(); |
| 203 | |
| 204 | assert_eq!( |
| 205 | spdx_file.document_creation_information.document_name, |
| 206 | "SPDX-Tools-v2.0" .to_string() |
| 207 | ); |
| 208 | } |
| 209 | |
| 210 | #[test ] |
| 211 | fn find_related_files_for_package() { |
| 212 | let spdx_file: SPDX = serde_json::from_str( |
| 213 | &read_to_string("tests/data/SPDXJSONExample-v2.2.spdx.json" ).unwrap(), |
| 214 | ) |
| 215 | .unwrap(); |
| 216 | |
| 217 | let package_1_files = spdx_file.get_files_for_package("SPDXRef-Package" ); |
| 218 | |
| 219 | assert_eq!(package_1_files.len(), 1); |
| 220 | |
| 221 | let file = package_1_files |
| 222 | .iter() |
| 223 | .find(|package_and_relationship| { |
| 224 | package_and_relationship.0.file_name == *"./lib-source/jena-2.6.3-sources.jar" |
| 225 | }) |
| 226 | .expect("Should always be found" ); |
| 227 | |
| 228 | assert_eq!(file.0.file_spdx_identifier, "SPDXRef-JenaLib" ); |
| 229 | assert_eq!(file.1.relationship_type, RelationshipType::Contains); |
| 230 | |
| 231 | assert_eq!( |
| 232 | file.0.concluded_license, |
| 233 | Some(SpdxExpression::parse("LicenseRef-1" ).unwrap()) |
| 234 | ); |
| 235 | } |
| 236 | |
| 237 | #[test ] |
| 238 | fn get_all_licenses_from_spdx() { |
| 239 | let spdx_file: SPDX = serde_json::from_str( |
| 240 | &read_to_string("tests/data/SPDXJSONExample-v2.2.spdx.json" ).unwrap(), |
| 241 | ) |
| 242 | .unwrap(); |
| 243 | |
| 244 | let actual = spdx_file.get_license_ids(); |
| 245 | |
| 246 | let expected = HashSet::from_iter([ |
| 247 | "Apache-2.0" .into(), |
| 248 | "LicenseRef-1" .into(), |
| 249 | "LGPL-2.0-only" .into(), |
| 250 | "LicenseRef-2" .into(), |
| 251 | ]); |
| 252 | |
| 253 | assert_eq!(expected, actual); |
| 254 | } |
| 255 | |
| 256 | #[test ] |
| 257 | fn get_relationships_for_spdx_id() { |
| 258 | let spdx_file: SPDX = serde_json::from_str( |
| 259 | &read_to_string("tests/data/SPDXJSONExample-v2.2.spdx.json" ).unwrap(), |
| 260 | ) |
| 261 | .unwrap(); |
| 262 | |
| 263 | let relationships = spdx_file.relationships_for_spdx_id("SPDXRef-Package" ); |
| 264 | let relationship_1 = Relationship { |
| 265 | spdx_element_id: "SPDXRef-Package" .into(), |
| 266 | related_spdx_element: "SPDXRef-Saxon" .into(), |
| 267 | relationship_type: RelationshipType::DynamicLink, |
| 268 | comment: None, |
| 269 | }; |
| 270 | let relationship_2 = Relationship { |
| 271 | spdx_element_id: "SPDXRef-Package" .into(), |
| 272 | related_spdx_element: "SPDXRef-JenaLib" .into(), |
| 273 | relationship_type: RelationshipType::Contains, |
| 274 | comment: None, |
| 275 | }; |
| 276 | let expected_relationships = vec![&relationship_1, &relationship_2]; |
| 277 | |
| 278 | assert_eq!(relationships, expected_relationships); |
| 279 | } |
| 280 | |
| 281 | #[test ] |
| 282 | fn get_relationships_for_related_spdx_id() { |
| 283 | let spdx_file: SPDX = serde_json::from_str( |
| 284 | &read_to_string("tests/data/SPDXJSONExample-v2.2.spdx.json" ).unwrap(), |
| 285 | ) |
| 286 | .unwrap(); |
| 287 | |
| 288 | let relationships = spdx_file.relationships_for_related_spdx_id("SPDXRef-Package" ); |
| 289 | let relationship_1 = Relationship { |
| 290 | spdx_element_id: "SPDXRef-DOCUMENT" .into(), |
| 291 | related_spdx_element: "SPDXRef-Package" .into(), |
| 292 | relationship_type: RelationshipType::Contains, |
| 293 | comment: None, |
| 294 | }; |
| 295 | let relationship_2 = Relationship { |
| 296 | spdx_element_id: "SPDXRef-DOCUMENT" .into(), |
| 297 | related_spdx_element: "SPDXRef-Package" .into(), |
| 298 | relationship_type: RelationshipType::Describes, |
| 299 | comment: None, |
| 300 | }; |
| 301 | let relationship_3 = Relationship { |
| 302 | spdx_element_id: "SPDXRef-JenaLib" .into(), |
| 303 | related_spdx_element: "SPDXRef-Package" .into(), |
| 304 | relationship_type: RelationshipType::Contains, |
| 305 | comment: None, |
| 306 | }; |
| 307 | let expected_relationships = vec![&relationship_1, &relationship_2, &relationship_3]; |
| 308 | |
| 309 | assert_eq!(relationships, expected_relationships); |
| 310 | } |
| 311 | |
| 312 | #[test ] |
| 313 | fn get_unique_hashes_for_files() { |
| 314 | let spdx_file: SPDX = serde_json::from_str( |
| 315 | &read_to_string("tests/data/SPDXJSONExample-v2.2.spdx.json" ).unwrap(), |
| 316 | ) |
| 317 | .unwrap(); |
| 318 | let hashes = spdx_file.get_unique_hashes(Algorithm::SHA1); |
| 319 | |
| 320 | let expected = [ |
| 321 | "2fd4e1c67a2d28fced849ee1bb76e7391b93eb12" .to_string(), |
| 322 | "c2b4e1c67a2d28fced849ee1bb76e7391b93f125" .to_string(), |
| 323 | "3ab4e1c67a2d28fced849ee1bb76e7391b93f125" .to_string(), |
| 324 | "d6a770ba38583ed4bb4525bd96e50461655d2758" .to_string(), |
| 325 | ] |
| 326 | .iter() |
| 327 | .cloned() |
| 328 | .collect::<HashSet<_>>(); |
| 329 | |
| 330 | assert_eq!(hashes, expected); |
| 331 | } |
| 332 | } |
| 333 | |