1 | // SPDX-FileCopyrightText: 2022 HH Partners |
2 | // |
3 | // SPDX-License-Identifier: MIT |
4 | |
5 | //! Parsing logic for SPDX Expressions. |
6 | //! |
7 | //! The code is heavily inspired by |
8 | //! <https://github.com/Geal/nom/blob/294ffb3d9e0ade2c3b7ddfff52484b6d643dcce1/tests/arithmetic_ast.rs> |
9 | //! which is licensed under the MIT License. The source project includes the following copyright |
10 | //! statement: Copyright (c) 2014-2019 Geoffroy Couprie. |
11 | |
12 | use nom::{ |
13 | branch::alt, |
14 | bytes::complete::{tag, tag_no_case, take_while1}, |
15 | character::{ |
16 | complete::{multispace0, multispace1}, |
17 | streaming::char, |
18 | }, |
19 | combinator::{complete, map, opt, recognize}, |
20 | multi::many0, |
21 | sequence::{delimited, pair, preceded, separated_pair}, |
22 | AsChar, IResult, |
23 | }; |
24 | |
25 | use crate::expression_variant::{ExpressionVariant, SimpleExpression, WithExpression}; |
26 | |
27 | #[derive (Debug)] |
28 | enum Operator { |
29 | And, |
30 | Or, |
31 | } |
32 | |
33 | fn parentheses(i: &str) -> IResult<&str, ExpressionVariant> { |
34 | delimited( |
35 | first:multispace0, |
36 | second:delimited( |
37 | tag("(" ), |
38 | map(or_expression, |e| ExpressionVariant::Parens(Box::new(e))), |
39 | tag(")" ), |
40 | ), |
41 | third:multispace0, |
42 | )(i) |
43 | } |
44 | |
45 | fn terminal_expression(i: &str) -> IResult<&str, ExpressionVariant> { |
46 | alt(( |
47 | delimited(first:multispace0, second:with_expression, third:multispace0), |
48 | map( |
49 | parser:delimited(multispace0, simple_expression, multispace0), |
50 | f:ExpressionVariant::Simple, |
51 | ), |
52 | parentheses, |
53 | ))(i) |
54 | } |
55 | |
56 | fn with_expression(i: &str) -> IResult<&str, ExpressionVariant> { |
57 | map( |
58 | parser:separated_pair( |
59 | simple_expression, |
60 | delimited(multispace1, tag_no_case("WITH" ), multispace1), |
61 | idstring, |
62 | ), |
63 | |(lic: SimpleExpression, exc: &str)| ExpressionVariant::With(WithExpression::new(license:lic, exception:exc.to_string())), |
64 | )(i) |
65 | } |
66 | |
67 | fn fold_expressions( |
68 | initial: ExpressionVariant, |
69 | remainder: Vec<(Operator, ExpressionVariant)>, |
70 | ) -> ExpressionVariant { |
71 | remainder.into_iter().fold(init:initial, |acc: ExpressionVariant, pair: (Operator, ExpressionVariant)| { |
72 | let (oper: Operator, expr: ExpressionVariant) = pair; |
73 | match oper { |
74 | Operator::And => ExpressionVariant::And(Box::new(acc), Box::new(expr)), |
75 | Operator::Or => ExpressionVariant::Or(Box::new(acc), Box::new(expr)), |
76 | } |
77 | }) |
78 | } |
79 | |
80 | fn and_expression(i: &str) -> IResult<&str, ExpressionVariant> { |
81 | let (i: &str, initial: ExpressionVariant) = terminal_expression(i)?; |
82 | let (i: &str, remainder: Vec<(Operator, ExpressionVariant)>) = many0(|i: &str| { |
83 | let (i: &str, and: ExpressionVariant) = preceded(first:tag_no_case("AND" ), second:terminal_expression)(i)?; |
84 | Ok((i, (Operator::And, and))) |
85 | })(i)?; |
86 | |
87 | Ok((i, fold_expressions(initial, remainder))) |
88 | } |
89 | |
90 | fn or_expression(i: &str) -> IResult<&str, ExpressionVariant> { |
91 | let (i: &str, initial: ExpressionVariant) = and_expression(i)?; |
92 | let (i: &str, remainder: Vec<(Operator, ExpressionVariant)>) = many0(|i: &str| { |
93 | let (i: &str, or: ExpressionVariant) = preceded(first:tag_no_case("OR" ), second:and_expression)(i)?; |
94 | Ok((i, (Operator::Or, or))) |
95 | })(i)?; |
96 | |
97 | Ok((i, fold_expressions(initial, remainder))) |
98 | } |
99 | |
100 | pub fn parse_expression(i: &str) -> IResult<&str, ExpressionVariant> { |
101 | or_expression(i) |
102 | } |
103 | |
104 | fn idstring(i: &str) -> IResult<&str, &str> { |
105 | take_while1(|c: char| c.is_alphanum() || c == '-' || c == '.' )(i) |
106 | } |
107 | |
108 | fn license_idstring(i: &str) -> IResult<&str, &str> { |
109 | recognize(parser:pair(first:idstring, second:opt(complete(char('+' )))))(i) |
110 | } |
111 | |
112 | fn document_ref(i: &str) -> IResult<&str, &str> { |
113 | delimited(first:tag("DocumentRef-" ), second:idstring, third:char(':' ))(i) |
114 | } |
115 | |
116 | fn license_ref(i: &str) -> IResult<&str, (Option<&str>, &str)> { |
117 | separated_pair(first:opt(document_ref), sep:tag("LicenseRef-" ), second:idstring)(i) |
118 | } |
119 | |
120 | pub fn simple_expression(i: &str) -> IResult<&str, SimpleExpression> { |
121 | alt(( |
122 | map(parser:license_ref, |(document_ref: Option<&str>, id: &str)| { |
123 | let document_ref: Option = document_ref.map(std::string::ToString::to_string); |
124 | SimpleExpression::new(identifier:id.to_string(), document_ref, license_ref:true) |
125 | }), |
126 | map(parser:license_idstring, |id: &str| { |
127 | SimpleExpression::new(identifier:id.to_string(), document_ref:None, license_ref:false) |
128 | }), |
129 | ))(i) |
130 | } |
131 | |
132 | #[cfg (test)] |
133 | mod tests { |
134 | //! A lot of the test cases for parsing are copied from |
135 | //! <https://github.com/oss-review-toolkit/ort/blob/6eb18b6d36f59c6d7ec221bad1cf5d4cd6acfc8b/utils/spdx/src/test/kotlin/SpdxExpressionParserTest.kt> |
136 | //! which is licensed under the Apache License, Version 2.0 and includes the following copyright |
137 | //! statement: |
138 | //! Copyright (C) 2017-2019 HERE Europe B.V. |
139 | |
140 | use super::*; |
141 | |
142 | use pretty_assertions::assert_eq; |
143 | |
144 | #[test ] |
145 | fn parse_a_license_id_correctly() { |
146 | let parsed = ExpressionVariant::parse("spdx.license-id" ).unwrap(); |
147 | assert_eq!( |
148 | parsed, |
149 | ExpressionVariant::Simple(SimpleExpression::new( |
150 | "spdx.license-id" .to_string(), |
151 | None, |
152 | false |
153 | )) |
154 | ); |
155 | } |
156 | |
157 | #[test ] |
158 | fn parse_a_license_id_starting_with_a_digit_correctly() { |
159 | let parsed = ExpressionVariant::parse("0license" ).unwrap(); |
160 | assert_eq!( |
161 | parsed, |
162 | ExpressionVariant::Simple(SimpleExpression::new("0license" .to_string(), None, false)) |
163 | ); |
164 | } |
165 | |
166 | #[test ] |
167 | fn parse_a_license_id_with_any_later_version_correctly() { |
168 | let parsed = ExpressionVariant::parse("license+" ).unwrap(); |
169 | assert_eq!( |
170 | parsed, |
171 | ExpressionVariant::Simple(SimpleExpression::new("license+" .to_string(), None, false)) |
172 | ); |
173 | } |
174 | |
175 | #[test ] |
176 | fn parse_a_document_ref_correctly() { |
177 | let parsed = ExpressionVariant::parse("DocumentRef-document:LicenseRef-license" ).unwrap(); |
178 | assert_eq!( |
179 | parsed, |
180 | ExpressionVariant::Simple(SimpleExpression::new( |
181 | "license" .to_string(), |
182 | Some("document" .to_string()), |
183 | true |
184 | )) |
185 | ); |
186 | } |
187 | |
188 | #[test ] |
189 | fn parse_a_license_ref_correctly() { |
190 | let parsed = ExpressionVariant::parse("LicenseRef-license" ).unwrap(); |
191 | assert_eq!( |
192 | parsed, |
193 | ExpressionVariant::Simple(SimpleExpression::new("license" .to_string(), None, true)) |
194 | ); |
195 | } |
196 | |
197 | #[test ] |
198 | fn parse_a_with_expression_correctly() { |
199 | let parsed = ExpressionVariant::parse("license WITH exception" ).unwrap(); |
200 | assert_eq!( |
201 | parsed, |
202 | ExpressionVariant::With(WithExpression::new( |
203 | SimpleExpression::new("license" .to_string(), None, false), |
204 | "exception" .to_string() |
205 | )) |
206 | ); |
207 | } |
208 | |
209 | #[test ] |
210 | fn parse_a_complex_expression_correctly() { |
211 | let parsed = ExpressionVariant::parse( |
212 | "license1+ and ((license2 with exception1) OR license3+ AND license4 WITH exception2)" , |
213 | ) |
214 | .unwrap(); |
215 | |
216 | assert_eq!( |
217 | parsed, |
218 | ExpressionVariant::And( |
219 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
220 | "license1+" .to_string(), |
221 | None, |
222 | false |
223 | ))), |
224 | Box::new(ExpressionVariant::Parens(Box::new(ExpressionVariant::Or( |
225 | Box::new(ExpressionVariant::Parens(Box::new( |
226 | ExpressionVariant::With(WithExpression::new( |
227 | SimpleExpression::new("license2" .to_string(), None, false), |
228 | "exception1" .to_string() |
229 | )) |
230 | ))), |
231 | Box::new(ExpressionVariant::And( |
232 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
233 | "license3+" .to_string(), |
234 | None, |
235 | false |
236 | ))), |
237 | Box::new(ExpressionVariant::With(WithExpression::new( |
238 | SimpleExpression::new("license4" .to_string(), None, false), |
239 | "exception2" .to_string() |
240 | ))) |
241 | )), |
242 | )))) |
243 | ) |
244 | ); |
245 | } |
246 | |
247 | #[test ] |
248 | fn bind_plus_stronger_than_with() { |
249 | let parsed = ExpressionVariant::parse("license+ WITH exception" ).unwrap(); |
250 | assert_eq!( |
251 | parsed, |
252 | ExpressionVariant::With(WithExpression::new( |
253 | SimpleExpression::new("license+" .to_string(), None, false), |
254 | "exception" .to_string() |
255 | )) |
256 | ); |
257 | } |
258 | |
259 | #[test ] |
260 | fn bind_with_stronger_than_and() { |
261 | let parsed = ExpressionVariant::parse("license1 AND license2 WITH exception" ).unwrap(); |
262 | assert_eq!( |
263 | parsed, |
264 | ExpressionVariant::And( |
265 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
266 | "license1" .to_string(), |
267 | None, |
268 | false |
269 | ))), |
270 | Box::new(ExpressionVariant::With(WithExpression::new( |
271 | SimpleExpression::new("license2" .to_string(), None, false), |
272 | "exception" .to_string() |
273 | ))) |
274 | ) |
275 | ); |
276 | } |
277 | |
278 | #[test ] |
279 | fn bind_and_stronger_than_or() { |
280 | let parsed = ExpressionVariant::parse("license1 OR license2 AND license3" ).unwrap(); |
281 | assert_eq!( |
282 | parsed, |
283 | ExpressionVariant::Or( |
284 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
285 | "license1" .to_string(), |
286 | None, |
287 | false |
288 | ))), |
289 | Box::new(ExpressionVariant::And( |
290 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
291 | "license2" .to_string(), |
292 | None, |
293 | false |
294 | ))), |
295 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
296 | "license3" .to_string(), |
297 | None, |
298 | false |
299 | ))) |
300 | )) |
301 | ) |
302 | ); |
303 | } |
304 | |
305 | #[test ] |
306 | fn bind_the_and_operator_left_associative() { |
307 | let parsed = ExpressionVariant::parse("license1 AND license2 AND license3" ).unwrap(); |
308 | assert_eq!( |
309 | parsed, |
310 | ExpressionVariant::And( |
311 | Box::new(ExpressionVariant::And( |
312 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
313 | "license1" .to_string(), |
314 | None, |
315 | false |
316 | ))), |
317 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
318 | "license2" .to_string(), |
319 | None, |
320 | false |
321 | ))) |
322 | )), |
323 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
324 | "license3" .to_string(), |
325 | None, |
326 | false |
327 | ))), |
328 | ) |
329 | ); |
330 | } |
331 | |
332 | #[test ] |
333 | fn bind_the_or_operator_left_associative() { |
334 | let parsed = ExpressionVariant::parse("license1 OR license2 OR license3" ).unwrap(); |
335 | assert_eq!( |
336 | parsed, |
337 | ExpressionVariant::Or( |
338 | Box::new(ExpressionVariant::Or( |
339 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
340 | "license1" .to_string(), |
341 | None, |
342 | false |
343 | ))), |
344 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
345 | "license2" .to_string(), |
346 | None, |
347 | false |
348 | ))) |
349 | )), |
350 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
351 | "license3" .to_string(), |
352 | None, |
353 | false |
354 | ))), |
355 | ) |
356 | ); |
357 | } |
358 | |
359 | #[test ] |
360 | fn respect_parentheses_for_binding_strength_of_operators() { |
361 | let parsed = ExpressionVariant::parse("(license1 OR license2) AND license3" ).unwrap(); |
362 | assert_eq!( |
363 | parsed, |
364 | ExpressionVariant::And( |
365 | Box::new(ExpressionVariant::Parens(Box::new(ExpressionVariant::Or( |
366 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
367 | "license1" .to_string(), |
368 | None, |
369 | false |
370 | ))), |
371 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
372 | "license2" .to_string(), |
373 | None, |
374 | false |
375 | ))) |
376 | )))), |
377 | Box::new(ExpressionVariant::Simple(SimpleExpression::new( |
378 | "license3" .to_string(), |
379 | None, |
380 | false |
381 | ))), |
382 | ) |
383 | ); |
384 | } |
385 | |
386 | #[test ] |
387 | fn fail_if_plus_is_used_in_an_exception_expression() { |
388 | let parsed = ExpressionVariant::parse("license WITH exception+" ); |
389 | assert!(parsed.is_err()); |
390 | } |
391 | |
392 | #[test ] |
393 | fn fail_if_a_compound_expressions_is_used_before_with() { |
394 | let parsed = ExpressionVariant::parse("(license1 AND license2) WITH exception" ); |
395 | assert!(parsed.is_err()); |
396 | } |
397 | |
398 | #[test ] |
399 | fn fail_on_an_invalid_symbol() { |
400 | let parsed = ExpressionVariant::parse("/" ); |
401 | assert!(parsed.is_err()); |
402 | } |
403 | |
404 | #[test ] |
405 | fn fail_on_a_syntax_error() { |
406 | let parsed = ExpressionVariant::parse("((" ); |
407 | assert!(parsed.is_err()); |
408 | } |
409 | } |
410 | |