1// SPDX-FileCopyrightText: 2022 HH Partners
2//
3// SPDX-License-Identifier: MIT
4
5//! Parsing logic for SPDX Expressions.
6//!
7//! The code is heavily inspired by
8//! <https://github.com/Geal/nom/blob/294ffb3d9e0ade2c3b7ddfff52484b6d643dcce1/tests/arithmetic_ast.rs>
9//! which is licensed under the MIT License. The source project includes the following copyright
10//! statement: Copyright (c) 2014-2019 Geoffroy Couprie.
11
12use nom::{
13 branch::alt,
14 bytes::complete::{tag, tag_no_case, take_while1},
15 character::{
16 complete::{multispace0, multispace1},
17 streaming::char,
18 },
19 combinator::{complete, map, opt, recognize},
20 multi::many0,
21 sequence::{delimited, pair, preceded, separated_pair},
22 AsChar, IResult,
23};
24
25use crate::expression_variant::{ExpressionVariant, SimpleExpression, WithExpression};
26
27#[derive(Debug)]
28enum Operator {
29 And,
30 Or,
31}
32
33fn parentheses(i: &str) -> IResult<&str, ExpressionVariant> {
34 delimited(
35 first:multispace0,
36 second:delimited(
37 tag("("),
38 map(or_expression, |e| ExpressionVariant::Parens(Box::new(e))),
39 tag(")"),
40 ),
41 third:multispace0,
42 )(i)
43}
44
45fn terminal_expression(i: &str) -> IResult<&str, ExpressionVariant> {
46 alt((
47 delimited(first:multispace0, second:with_expression, third:multispace0),
48 map(
49 parser:delimited(multispace0, simple_expression, multispace0),
50 f:ExpressionVariant::Simple,
51 ),
52 parentheses,
53 ))(i)
54}
55
56fn with_expression(i: &str) -> IResult<&str, ExpressionVariant> {
57 map(
58 parser:separated_pair(
59 simple_expression,
60 delimited(multispace1, tag_no_case("WITH"), multispace1),
61 idstring,
62 ),
63 |(lic: SimpleExpression, exc: &str)| ExpressionVariant::With(WithExpression::new(license:lic, exception:exc.to_string())),
64 )(i)
65}
66
67fn fold_expressions(
68 initial: ExpressionVariant,
69 remainder: Vec<(Operator, ExpressionVariant)>,
70) -> ExpressionVariant {
71 remainder.into_iter().fold(init:initial, |acc: ExpressionVariant, pair: (Operator, ExpressionVariant)| {
72 let (oper: Operator, expr: ExpressionVariant) = pair;
73 match oper {
74 Operator::And => ExpressionVariant::And(Box::new(acc), Box::new(expr)),
75 Operator::Or => ExpressionVariant::Or(Box::new(acc), Box::new(expr)),
76 }
77 })
78}
79
80fn and_expression(i: &str) -> IResult<&str, ExpressionVariant> {
81 let (i: &str, initial: ExpressionVariant) = terminal_expression(i)?;
82 let (i: &str, remainder: Vec<(Operator, ExpressionVariant)>) = many0(|i: &str| {
83 let (i: &str, and: ExpressionVariant) = preceded(first:tag_no_case("AND"), second:terminal_expression)(i)?;
84 Ok((i, (Operator::And, and)))
85 })(i)?;
86
87 Ok((i, fold_expressions(initial, remainder)))
88}
89
90fn or_expression(i: &str) -> IResult<&str, ExpressionVariant> {
91 let (i: &str, initial: ExpressionVariant) = and_expression(i)?;
92 let (i: &str, remainder: Vec<(Operator, ExpressionVariant)>) = many0(|i: &str| {
93 let (i: &str, or: ExpressionVariant) = preceded(first:tag_no_case("OR"), second:and_expression)(i)?;
94 Ok((i, (Operator::Or, or)))
95 })(i)?;
96
97 Ok((i, fold_expressions(initial, remainder)))
98}
99
100pub fn parse_expression(i: &str) -> IResult<&str, ExpressionVariant> {
101 or_expression(i)
102}
103
104fn idstring(i: &str) -> IResult<&str, &str> {
105 take_while1(|c: char| c.is_alphanum() || c == '-' || c == '.')(i)
106}
107
108fn license_idstring(i: &str) -> IResult<&str, &str> {
109 recognize(parser:pair(first:idstring, second:opt(complete(char('+')))))(i)
110}
111
112fn document_ref(i: &str) -> IResult<&str, &str> {
113 delimited(first:tag("DocumentRef-"), second:idstring, third:char(':'))(i)
114}
115
116fn license_ref(i: &str) -> IResult<&str, (Option<&str>, &str)> {
117 separated_pair(first:opt(document_ref), sep:tag("LicenseRef-"), second:idstring)(i)
118}
119
120pub fn simple_expression(i: &str) -> IResult<&str, SimpleExpression> {
121 alt((
122 map(parser:license_ref, |(document_ref: Option<&str>, id: &str)| {
123 let document_ref: Option = document_ref.map(std::string::ToString::to_string);
124 SimpleExpression::new(identifier:id.to_string(), document_ref, license_ref:true)
125 }),
126 map(parser:license_idstring, |id: &str| {
127 SimpleExpression::new(identifier:id.to_string(), document_ref:None, license_ref:false)
128 }),
129 ))(i)
130}
131
132#[cfg(test)]
133mod tests {
134 //! A lot of the test cases for parsing are copied from
135 //! <https://github.com/oss-review-toolkit/ort/blob/6eb18b6d36f59c6d7ec221bad1cf5d4cd6acfc8b/utils/spdx/src/test/kotlin/SpdxExpressionParserTest.kt>
136 //! which is licensed under the Apache License, Version 2.0 and includes the following copyright
137 //! statement:
138 //! Copyright (C) 2017-2019 HERE Europe B.V.
139
140 use super::*;
141
142 use pretty_assertions::assert_eq;
143
144 #[test]
145 fn parse_a_license_id_correctly() {
146 let parsed = ExpressionVariant::parse("spdx.license-id").unwrap();
147 assert_eq!(
148 parsed,
149 ExpressionVariant::Simple(SimpleExpression::new(
150 "spdx.license-id".to_string(),
151 None,
152 false
153 ))
154 );
155 }
156
157 #[test]
158 fn parse_a_license_id_starting_with_a_digit_correctly() {
159 let parsed = ExpressionVariant::parse("0license").unwrap();
160 assert_eq!(
161 parsed,
162 ExpressionVariant::Simple(SimpleExpression::new("0license".to_string(), None, false))
163 );
164 }
165
166 #[test]
167 fn parse_a_license_id_with_any_later_version_correctly() {
168 let parsed = ExpressionVariant::parse("license+").unwrap();
169 assert_eq!(
170 parsed,
171 ExpressionVariant::Simple(SimpleExpression::new("license+".to_string(), None, false))
172 );
173 }
174
175 #[test]
176 fn parse_a_document_ref_correctly() {
177 let parsed = ExpressionVariant::parse("DocumentRef-document:LicenseRef-license").unwrap();
178 assert_eq!(
179 parsed,
180 ExpressionVariant::Simple(SimpleExpression::new(
181 "license".to_string(),
182 Some("document".to_string()),
183 true
184 ))
185 );
186 }
187
188 #[test]
189 fn parse_a_license_ref_correctly() {
190 let parsed = ExpressionVariant::parse("LicenseRef-license").unwrap();
191 assert_eq!(
192 parsed,
193 ExpressionVariant::Simple(SimpleExpression::new("license".to_string(), None, true))
194 );
195 }
196
197 #[test]
198 fn parse_a_with_expression_correctly() {
199 let parsed = ExpressionVariant::parse("license WITH exception").unwrap();
200 assert_eq!(
201 parsed,
202 ExpressionVariant::With(WithExpression::new(
203 SimpleExpression::new("license".to_string(), None, false),
204 "exception".to_string()
205 ))
206 );
207 }
208
209 #[test]
210 fn parse_a_complex_expression_correctly() {
211 let parsed = ExpressionVariant::parse(
212 "license1+ and ((license2 with exception1) OR license3+ AND license4 WITH exception2)",
213 )
214 .unwrap();
215
216 assert_eq!(
217 parsed,
218 ExpressionVariant::And(
219 Box::new(ExpressionVariant::Simple(SimpleExpression::new(
220 "license1+".to_string(),
221 None,
222 false
223 ))),
224 Box::new(ExpressionVariant::Parens(Box::new(ExpressionVariant::Or(
225 Box::new(ExpressionVariant::Parens(Box::new(
226 ExpressionVariant::With(WithExpression::new(
227 SimpleExpression::new("license2".to_string(), None, false),
228 "exception1".to_string()
229 ))
230 ))),
231 Box::new(ExpressionVariant::And(
232 Box::new(ExpressionVariant::Simple(SimpleExpression::new(
233 "license3+".to_string(),
234 None,
235 false
236 ))),
237 Box::new(ExpressionVariant::With(WithExpression::new(
238 SimpleExpression::new("license4".to_string(), None, false),
239 "exception2".to_string()
240 )))
241 )),
242 ))))
243 )
244 );
245 }
246
247 #[test]
248 fn bind_plus_stronger_than_with() {
249 let parsed = ExpressionVariant::parse("license+ WITH exception").unwrap();
250 assert_eq!(
251 parsed,
252 ExpressionVariant::With(WithExpression::new(
253 SimpleExpression::new("license+".to_string(), None, false),
254 "exception".to_string()
255 ))
256 );
257 }
258
259 #[test]
260 fn bind_with_stronger_than_and() {
261 let parsed = ExpressionVariant::parse("license1 AND license2 WITH exception").unwrap();
262 assert_eq!(
263 parsed,
264 ExpressionVariant::And(
265 Box::new(ExpressionVariant::Simple(SimpleExpression::new(
266 "license1".to_string(),
267 None,
268 false
269 ))),
270 Box::new(ExpressionVariant::With(WithExpression::new(
271 SimpleExpression::new("license2".to_string(), None, false),
272 "exception".to_string()
273 )))
274 )
275 );
276 }
277
278 #[test]
279 fn bind_and_stronger_than_or() {
280 let parsed = ExpressionVariant::parse("license1 OR license2 AND license3").unwrap();
281 assert_eq!(
282 parsed,
283 ExpressionVariant::Or(
284 Box::new(ExpressionVariant::Simple(SimpleExpression::new(
285 "license1".to_string(),
286 None,
287 false
288 ))),
289 Box::new(ExpressionVariant::And(
290 Box::new(ExpressionVariant::Simple(SimpleExpression::new(
291 "license2".to_string(),
292 None,
293 false
294 ))),
295 Box::new(ExpressionVariant::Simple(SimpleExpression::new(
296 "license3".to_string(),
297 None,
298 false
299 )))
300 ))
301 )
302 );
303 }
304
305 #[test]
306 fn bind_the_and_operator_left_associative() {
307 let parsed = ExpressionVariant::parse("license1 AND license2 AND license3").unwrap();
308 assert_eq!(
309 parsed,
310 ExpressionVariant::And(
311 Box::new(ExpressionVariant::And(
312 Box::new(ExpressionVariant::Simple(SimpleExpression::new(
313 "license1".to_string(),
314 None,
315 false
316 ))),
317 Box::new(ExpressionVariant::Simple(SimpleExpression::new(
318 "license2".to_string(),
319 None,
320 false
321 )))
322 )),
323 Box::new(ExpressionVariant::Simple(SimpleExpression::new(
324 "license3".to_string(),
325 None,
326 false
327 ))),
328 )
329 );
330 }
331
332 #[test]
333 fn bind_the_or_operator_left_associative() {
334 let parsed = ExpressionVariant::parse("license1 OR license2 OR license3").unwrap();
335 assert_eq!(
336 parsed,
337 ExpressionVariant::Or(
338 Box::new(ExpressionVariant::Or(
339 Box::new(ExpressionVariant::Simple(SimpleExpression::new(
340 "license1".to_string(),
341 None,
342 false
343 ))),
344 Box::new(ExpressionVariant::Simple(SimpleExpression::new(
345 "license2".to_string(),
346 None,
347 false
348 )))
349 )),
350 Box::new(ExpressionVariant::Simple(SimpleExpression::new(
351 "license3".to_string(),
352 None,
353 false
354 ))),
355 )
356 );
357 }
358
359 #[test]
360 fn respect_parentheses_for_binding_strength_of_operators() {
361 let parsed = ExpressionVariant::parse("(license1 OR license2) AND license3").unwrap();
362 assert_eq!(
363 parsed,
364 ExpressionVariant::And(
365 Box::new(ExpressionVariant::Parens(Box::new(ExpressionVariant::Or(
366 Box::new(ExpressionVariant::Simple(SimpleExpression::new(
367 "license1".to_string(),
368 None,
369 false
370 ))),
371 Box::new(ExpressionVariant::Simple(SimpleExpression::new(
372 "license2".to_string(),
373 None,
374 false
375 )))
376 )))),
377 Box::new(ExpressionVariant::Simple(SimpleExpression::new(
378 "license3".to_string(),
379 None,
380 false
381 ))),
382 )
383 );
384 }
385
386 #[test]
387 fn fail_if_plus_is_used_in_an_exception_expression() {
388 let parsed = ExpressionVariant::parse("license WITH exception+");
389 assert!(parsed.is_err());
390 }
391
392 #[test]
393 fn fail_if_a_compound_expressions_is_used_before_with() {
394 let parsed = ExpressionVariant::parse("(license1 AND license2) WITH exception");
395 assert!(parsed.is_err());
396 }
397
398 #[test]
399 fn fail_on_an_invalid_symbol() {
400 let parsed = ExpressionVariant::parse("/");
401 assert!(parsed.is_err());
402 }
403
404 #[test]
405 fn fail_on_a_syntax_error() {
406 let parsed = ExpressionVariant::parse("((");
407 assert!(parsed.is_err());
408 }
409}
410