| 1 | // SPDX-FileCopyrightText: 2022 HH Partners |
| 2 | // |
| 3 | // SPDX-License-Identifier: MIT |
| 4 | |
| 5 | //! Parsing logic for SPDX Expressions. |
| 6 | //! |
| 7 | //! The code is heavily inspired by |
| 8 | //! <https://github.com/Geal/nom/blob/294ffb3d9e0ade2c3b7ddfff52484b6d643dcce1/tests/arithmetic_ast.rs> |
| 9 | //! which is licensed under the MIT License. The source project includes the following copyright |
| 10 | //! statement: Copyright (c) 2014-2019 Geoffroy Couprie. |
| 11 | |
| 12 | use nom::{ |
| 13 | branch::alt, |
| 14 | bytes::complete::{tag, tag_no_case, take_while1}, |
| 15 | character::{ |
| 16 | complete::{multispace0, multispace1}, |
| 17 | streaming::char, |
| 18 | }, |
| 19 | combinator::{complete, map, opt, recognize}, |
| 20 | multi::many0, |
| 21 | sequence::{delimited, pair, preceded, separated_pair}, |
| 22 | AsChar, IResult, |
| 23 | }; |
| 24 | |
| 25 | use crate::expression_variant::{ExpressionVariant, SimpleExpression, WithExpression}; |
| 26 | |
| 27 | #[derive (Debug)] |
| 28 | enum Operator { |
| 29 | And, |
| 30 | Or, |
| 31 | } |
| 32 | |
| 33 | fn parentheses(i: &str) -> IResult<&str, ExpressionVariant> { |
| 34 | delimited( |
| 35 | first:multispace0, |
| 36 | second:delimited( |
| 37 | tag("(" ), |
| 38 | map(or_expression, |e| ExpressionVariant::Parens(Box::new(e))), |
| 39 | tag(")" ), |
| 40 | ), |
| 41 | third:multispace0, |
| 42 | )(i) |
| 43 | } |
| 44 | |
| 45 | fn terminal_expression(i: &str) -> IResult<&str, ExpressionVariant> { |
| 46 | alt(( |
| 47 | delimited(first:multispace0, second:with_expression, third:multispace0), |
| 48 | map( |
| 49 | parser:delimited(multispace0, simple_expression, multispace0), |
| 50 | f:ExpressionVariant::Simple, |
| 51 | ), |
| 52 | parentheses, |
| 53 | ))(i) |
| 54 | } |
| 55 | |
| 56 | fn with_expression(i: &str) -> IResult<&str, ExpressionVariant> { |
| 57 | map( |
| 58 | parser:separated_pair( |
| 59 | simple_expression, |
| 60 | delimited(multispace1, tag_no_case("WITH" ), multispace1), |
| 61 | idstring, |
| 62 | ), |
| 63 | |(lic: SimpleExpression, exc: &str)| ExpressionVariant::With(WithExpression::new(license:lic, exception:exc.to_string())), |
| 64 | )(i) |
| 65 | } |
| 66 | |
| 67 | fn fold_expressions( |
| 68 | initial: ExpressionVariant, |
| 69 | remainder: Vec<(Operator, ExpressionVariant)>, |
| 70 | ) -> ExpressionVariant { |
| 71 | remainder.into_iter().fold(init:initial, |acc: ExpressionVariant, pair: (Operator, ExpressionVariant)| { |
| 72 | let (oper: Operator, expr: ExpressionVariant) = pair; |
| 73 | match oper { |
| 74 | Operator::And => ExpressionVariant::And(Box::new(acc), Box::new(expr)), |
| 75 | Operator::Or => ExpressionVariant::Or(Box::new(acc), Box::new(expr)), |
| 76 | } |
| 77 | }) |
| 78 | } |
| 79 | |
| 80 | fn and_expression(i: &str) -> IResult<&str, ExpressionVariant> { |
| 81 | let (i: &str, initial: ExpressionVariant) = terminal_expression(i)?; |
| 82 | let (i: &str, remainder: Vec<(Operator, ExpressionVariant)>) = many0(|i: &str| { |
| 83 | let (i: &str, and: ExpressionVariant) = preceded(first:tag_no_case("AND" ), second:terminal_expression)(i)?; |
| 84 | Ok((i, (Operator::And, and))) |
| 85 | })(i)?; |
| 86 | |
| 87 | Ok((i, fold_expressions(initial, remainder))) |
| 88 | } |
| 89 | |
| 90 | fn or_expression(i: &str) -> IResult<&str, ExpressionVariant> { |
| 91 | let (i: &str, initial: ExpressionVariant) = and_expression(i)?; |
| 92 | let (i: &str, remainder: Vec<(Operator, ExpressionVariant)>) = many0(|i: &str| { |
| 93 | let (i: &str, or: ExpressionVariant) = preceded(first:tag_no_case("OR" ), second:and_expression)(i)?; |
| 94 | Ok((i, (Operator::Or, or))) |
| 95 | })(i)?; |
| 96 | |
| 97 | Ok((i, fold_expressions(initial, remainder))) |
| 98 | } |
| 99 | |
| 100 | pub fn parse_expression(i: &str) -> IResult<&str, ExpressionVariant> { |
| 101 | or_expression(i) |
| 102 | } |
| 103 | |
| 104 | fn idstring(i: &str) -> IResult<&str, &str> { |
| 105 | take_while1(|c: char| c.is_alphanum() || c == '-' || c == '.' )(i) |
| 106 | } |
| 107 | |
| 108 | fn license_idstring(i: &str) -> IResult<&str, &str> { |
| 109 | recognize(parser:pair(first:idstring, second:opt(complete(char('+' )))))(i) |
| 110 | } |
| 111 | |
| 112 | fn document_ref(i: &str) -> IResult<&str, &str> { |
| 113 | delimited(first:tag("DocumentRef-" ), second:idstring, third:char(':' ))(i) |
| 114 | } |
| 115 | |
| 116 | fn license_ref(i: &str) -> IResult<&str, (Option<&str>, &str)> { |
| 117 | separated_pair(first:opt(document_ref), sep:tag("LicenseRef-" ), second:idstring)(i) |
| 118 | } |
| 119 | |
| 120 | pub fn simple_expression(i: &str) -> IResult<&str, SimpleExpression> { |
| 121 | alt(( |
| 122 | map(parser:license_ref, |(document_ref: Option<&str>, id: &str)| { |
| 123 | let document_ref: Option = document_ref.map(std::string::ToString::to_string); |
| 124 | SimpleExpression::new(identifier:id.to_string(), document_ref, license_ref:true) |
| 125 | }), |
| 126 | map(parser:license_idstring, |id: &str| { |
| 127 | SimpleExpression::new(identifier:id.to_string(), document_ref:None, license_ref:false) |
| 128 | }), |
| 129 | ))(i) |
| 130 | } |
| 131 | |
| 132 | #[cfg (test)] |
| 133 | mod tests { |
| 134 | //! A lot of the test cases for parsing are copied from |
| 135 | //! <https://github.com/oss-review-toolkit/ort/blob/6eb18b6d36f59c6d7ec221bad1cf5d4cd6acfc8b/utils/spdx/src/test/kotlin/SpdxExpressionParserTest.kt> |
| 136 | //! which is licensed under the Apache License, Version 2.0 and includes the following copyright |
| 137 | //! statement: |
| 138 | //! Copyright (C) 2017-2019 HERE Europe B.V. |
| 139 | |
| 140 | use super::*; |
| 141 | |
| 142 | use pretty_assertions::assert_eq; |
| 143 | |
| 144 | #[test ] |
| 145 | fn parse_a_license_id_correctly() { |
| 146 | let parsed = ExpressionVariant::parse("spdx.license-id" ).unwrap(); |
| 147 | assert_eq!( |
| 148 | parsed, |
| 149 | ExpressionVariant::Simple(SimpleExpression::new( |
| 150 | "spdx.license-id" .to_string(), |
| 151 | None, |
| 152 | false |
| 153 | )) |
| 154 | ); |
| 155 | } |
| 156 | |
| 157 | #[test ] |
| 158 | fn parse_a_license_id_starting_with_a_digit_correctly() { |
| 159 | let parsed = ExpressionVariant::parse("0license" ).unwrap(); |
| 160 | assert_eq!( |
| 161 | parsed, |
| 162 | ExpressionVariant::Simple(SimpleExpression::new("0license" .to_string(), None, false)) |
| 163 | ); |
| 164 | } |
| 165 | |
| 166 | #[test ] |
| 167 | fn parse_a_license_id_with_any_later_version_correctly() { |
| 168 | let parsed = ExpressionVariant::parse("license+" ).unwrap(); |
| 169 | assert_eq!( |
| 170 | parsed, |
| 171 | ExpressionVariant::Simple(SimpleExpression::new("license+" .to_string(), None, false)) |
| 172 | ); |
| 173 | } |
| 174 | |
| 175 | #[test ] |
| 176 | fn parse_a_document_ref_correctly() { |
| 177 | let parsed = ExpressionVariant::parse("DocumentRef-document:LicenseRef-license" ).unwrap(); |
| 178 | assert_eq!( |
| 179 | parsed, |
| 180 | ExpressionVariant::Simple(SimpleExpression::new( |
| 181 | "license" .to_string(), |
| 182 | Some("document" .to_string()), |
| 183 | true |
| 184 | )) |
| 185 | ); |
| 186 | } |
| 187 | |
| 188 | #[test ] |
| 189 | fn parse_a_license_ref_correctly() { |
| 190 | let parsed = ExpressionVariant::parse("LicenseRef-license" ).unwrap(); |
| 191 | assert_eq!( |
| 192 | parsed, |
| 193 | ExpressionVariant::Simple(SimpleExpression::new("license" .to_string(), None, true)) |
| 194 | ); |
| 195 | } |
| 196 | |
| 197 | #[test ] |
| 198 | fn parse_a_with_expression_correctly() { |
| 199 | let parsed = ExpressionVariant::parse("license WITH exception" ).unwrap(); |
| 200 | assert_eq!( |
| 201 | parsed, |
| 202 | ExpressionVariant::With(WithExpression::new( |
| 203 | SimpleExpression::new("license" .to_string(), None, false), |
| 204 | "exception" .to_string() |
| 205 | )) |
| 206 | ); |
| 207 | } |
| 208 | |
| 209 | #[test ] |
| 210 | fn parse_a_complex_expression_correctly() { |
| 211 | let parsed = ExpressionVariant::parse( |
| 212 | "license1+ and ((license2 with exception1) OR license3+ AND license4 WITH exception2)" , |
| 213 | ) |
| 214 | .unwrap(); |
| 215 | |
| 216 | assert_eq!( |
| 217 | parsed, |
| 218 | ExpressionVariant::And( |
| 219 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
| 220 | "license1+" .to_string(), |
| 221 | None, |
| 222 | false |
| 223 | ))), |
| 224 | Box::new(ExpressionVariant::Parens(Box::new(ExpressionVariant::Or( |
| 225 | Box::new(ExpressionVariant::Parens(Box::new( |
| 226 | ExpressionVariant::With(WithExpression::new( |
| 227 | SimpleExpression::new("license2" .to_string(), None, false), |
| 228 | "exception1" .to_string() |
| 229 | )) |
| 230 | ))), |
| 231 | Box::new(ExpressionVariant::And( |
| 232 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
| 233 | "license3+" .to_string(), |
| 234 | None, |
| 235 | false |
| 236 | ))), |
| 237 | Box::new(ExpressionVariant::With(WithExpression::new( |
| 238 | SimpleExpression::new("license4" .to_string(), None, false), |
| 239 | "exception2" .to_string() |
| 240 | ))) |
| 241 | )), |
| 242 | )))) |
| 243 | ) |
| 244 | ); |
| 245 | } |
| 246 | |
| 247 | #[test ] |
| 248 | fn bind_plus_stronger_than_with() { |
| 249 | let parsed = ExpressionVariant::parse("license+ WITH exception" ).unwrap(); |
| 250 | assert_eq!( |
| 251 | parsed, |
| 252 | ExpressionVariant::With(WithExpression::new( |
| 253 | SimpleExpression::new("license+" .to_string(), None, false), |
| 254 | "exception" .to_string() |
| 255 | )) |
| 256 | ); |
| 257 | } |
| 258 | |
| 259 | #[test ] |
| 260 | fn bind_with_stronger_than_and() { |
| 261 | let parsed = ExpressionVariant::parse("license1 AND license2 WITH exception" ).unwrap(); |
| 262 | assert_eq!( |
| 263 | parsed, |
| 264 | ExpressionVariant::And( |
| 265 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
| 266 | "license1" .to_string(), |
| 267 | None, |
| 268 | false |
| 269 | ))), |
| 270 | Box::new(ExpressionVariant::With(WithExpression::new( |
| 271 | SimpleExpression::new("license2" .to_string(), None, false), |
| 272 | "exception" .to_string() |
| 273 | ))) |
| 274 | ) |
| 275 | ); |
| 276 | } |
| 277 | |
| 278 | #[test ] |
| 279 | fn bind_and_stronger_than_or() { |
| 280 | let parsed = ExpressionVariant::parse("license1 OR license2 AND license3" ).unwrap(); |
| 281 | assert_eq!( |
| 282 | parsed, |
| 283 | ExpressionVariant::Or( |
| 284 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
| 285 | "license1" .to_string(), |
| 286 | None, |
| 287 | false |
| 288 | ))), |
| 289 | Box::new(ExpressionVariant::And( |
| 290 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
| 291 | "license2" .to_string(), |
| 292 | None, |
| 293 | false |
| 294 | ))), |
| 295 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
| 296 | "license3" .to_string(), |
| 297 | None, |
| 298 | false |
| 299 | ))) |
| 300 | )) |
| 301 | ) |
| 302 | ); |
| 303 | } |
| 304 | |
| 305 | #[test ] |
| 306 | fn bind_the_and_operator_left_associative() { |
| 307 | let parsed = ExpressionVariant::parse("license1 AND license2 AND license3" ).unwrap(); |
| 308 | assert_eq!( |
| 309 | parsed, |
| 310 | ExpressionVariant::And( |
| 311 | Box::new(ExpressionVariant::And( |
| 312 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
| 313 | "license1" .to_string(), |
| 314 | None, |
| 315 | false |
| 316 | ))), |
| 317 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
| 318 | "license2" .to_string(), |
| 319 | None, |
| 320 | false |
| 321 | ))) |
| 322 | )), |
| 323 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
| 324 | "license3" .to_string(), |
| 325 | None, |
| 326 | false |
| 327 | ))), |
| 328 | ) |
| 329 | ); |
| 330 | } |
| 331 | |
| 332 | #[test ] |
| 333 | fn bind_the_or_operator_left_associative() { |
| 334 | let parsed = ExpressionVariant::parse("license1 OR license2 OR license3" ).unwrap(); |
| 335 | assert_eq!( |
| 336 | parsed, |
| 337 | ExpressionVariant::Or( |
| 338 | Box::new(ExpressionVariant::Or( |
| 339 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
| 340 | "license1" .to_string(), |
| 341 | None, |
| 342 | false |
| 343 | ))), |
| 344 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
| 345 | "license2" .to_string(), |
| 346 | None, |
| 347 | false |
| 348 | ))) |
| 349 | )), |
| 350 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
| 351 | "license3" .to_string(), |
| 352 | None, |
| 353 | false |
| 354 | ))), |
| 355 | ) |
| 356 | ); |
| 357 | } |
| 358 | |
| 359 | #[test ] |
| 360 | fn respect_parentheses_for_binding_strength_of_operators() { |
| 361 | let parsed = ExpressionVariant::parse("(license1 OR license2) AND license3" ).unwrap(); |
| 362 | assert_eq!( |
| 363 | parsed, |
| 364 | ExpressionVariant::And( |
| 365 | Box::new(ExpressionVariant::Parens(Box::new(ExpressionVariant::Or( |
| 366 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
| 367 | "license1" .to_string(), |
| 368 | None, |
| 369 | false |
| 370 | ))), |
| 371 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
| 372 | "license2" .to_string(), |
| 373 | None, |
| 374 | false |
| 375 | ))) |
| 376 | )))), |
| 377 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
| 378 | "license3" .to_string(), |
| 379 | None, |
| 380 | false |
| 381 | ))), |
| 382 | ) |
| 383 | ); |
| 384 | } |
| 385 | |
| 386 | #[test ] |
| 387 | fn fail_if_plus_is_used_in_an_exception_expression() { |
| 388 | let parsed = ExpressionVariant::parse("license WITH exception+" ); |
| 389 | assert!(parsed.is_err()); |
| 390 | } |
| 391 | |
| 392 | #[test ] |
| 393 | fn fail_if_a_compound_expressions_is_used_before_with() { |
| 394 | let parsed = ExpressionVariant::parse("(license1 AND license2) WITH exception" ); |
| 395 | assert!(parsed.is_err()); |
| 396 | } |
| 397 | |
| 398 | #[test ] |
| 399 | fn fail_on_an_invalid_symbol() { |
| 400 | let parsed = ExpressionVariant::parse("/" ); |
| 401 | assert!(parsed.is_err()); |
| 402 | } |
| 403 | |
| 404 | #[test ] |
| 405 | fn fail_on_a_syntax_error() { |
| 406 | let parsed = ExpressionVariant::parse("((" ); |
| 407 | assert!(parsed.is_err()); |
| 408 | } |
| 409 | } |
| 410 | |