1use crate::{
2 error::{ParseError, Reason},
3 expression::{ExprNode, Expression, ExpressionReq, Operator},
4 lexer::{Lexer, Token},
5 LicenseItem, LicenseReq, ParseMode,
6};
7use smallvec::SmallVec;
8
9impl Expression {
10 /// Given a license expression, attempts to parse and validate it as a valid
11 /// SPDX expression. Uses `ParseMode::Strict`.
12 ///
13 /// The validation can fail for many reasons:
14 /// * The expression contains invalid characters
15 /// * An unknown/invalid license or exception identifier was found. Only
16 /// [SPDX short identifiers](https://spdx.org/ids) are allowed
17 /// * The expression contained unbalanced parentheses
18 /// * A license or exception immediately follows another license or exception, without
19 /// a valid AND, OR, or WITH operator separating them
20 /// * An AND, OR, or WITH doesn't have a license or `)` preceding it
21 ///
22 /// ```
23 /// spdx::Expression::parse("MIT OR Apache-2.0 WITH LLVM-exception").unwrap();
24 /// ```
25 pub fn parse(original: &str) -> Result<Self, ParseError> {
26 Self::parse_mode(original, ParseMode::STRICT)
27 }
28
29 /// Canonicalizes the input expression into a form that can be parsed with
30 /// [`ParseMode::STRICT`]
31 ///
32 /// ## Transforms
33 ///
34 /// 1. '/' is replaced with ' OR '
35 /// 1. Lower-cased operators ('or', 'and', 'with') are upper-cased
36 /// 1. '+' is tranformed to `-or-later` for GNU licenses
37 /// 1. Invalid/imprecise license identifiers (eg. `apache2`) are replaced
38 /// with their valid identifiers
39 ///
40 /// If the provided expression is not modified then `None` is returned
41 ///
42 /// Note that this only does fixup of otherwise valid expressions, passing
43 /// the resulting string to [`Expression::parse`] can still result in
44 /// additional parse errors, eg. unbalanced parentheses
45 ///
46 /// ```
47 /// assert_eq!(spdx::Expression::canonicalize("apache with LLVM-exception/gpl-3.0+").unwrap().unwrap(), "Apache-2.0 WITH LLVM-exception OR GPL-3.0-or-later");
48 /// ```
49 pub fn canonicalize(original: &str) -> Result<Option<String>, ParseError> {
50 let mut can = String::with_capacity(original.len());
51
52 let lexer = Lexer::new_mode(original, ParseMode::LAX);
53
54 // Keep track if the last license id is a GNU license that uses the -or-later
55 // convention rather than the + like all other licenses
56 let mut last_is_gnu = false;
57 for tok in lexer {
58 let tok = tok?;
59
60 match tok.token {
61 Token::Spdx(id) => {
62 last_is_gnu = id.is_gnu();
63 can.push_str(id.name);
64 }
65 Token::And => can.push_str(" AND "),
66 Token::Or => can.push_str(" OR "),
67 Token::With => can.push_str(" WITH "),
68 Token::Plus => {
69 if last_is_gnu {
70 can.push_str("-or-later");
71 } else {
72 can.push('+');
73 }
74 }
75 Token::OpenParen => can.push('('),
76 Token::CloseParen => can.push(')'),
77 Token::Exception(exc) => can.push_str(exc.name),
78 Token::LicenseRef { doc_ref, lic_ref } => {
79 if let Some(dr) = doc_ref {
80 can.push_str("DocumentRef-");
81 can.push_str(dr);
82 can.push(':');
83 }
84
85 can.push_str("LicenseRef-");
86 can.push_str(lic_ref);
87 }
88 }
89 }
90
91 Ok((can != original).then_some(can))
92 }
93
94 /// Parses an expression with the specified `ParseMode`. With
95 /// `ParseMode::Lax` it permits some non-SPDX syntax, such as imprecise
96 /// license names and "/" used instead of "OR" in exprssions.
97 ///
98 /// ```
99 /// spdx::Expression::parse_mode(
100 /// "mit/Apache-2.0 WITH LLVM-exception",
101 /// spdx::ParseMode::LAX
102 /// ).unwrap();
103 /// ```
104 pub fn parse_mode(original: &str, mode: ParseMode) -> Result<Self, ParseError> {
105 // Operator precedence in SPDX 2.1
106 // +
107 // WITH
108 // AND
109 // OR
110 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
111 enum Op {
112 //Plus,
113 //With,
114 And,
115 Or,
116 Open,
117 }
118
119 struct OpAndSpan {
120 op: Op,
121 span: std::ops::Range<usize>,
122 }
123
124 let lexer = Lexer::new_mode(original, mode);
125 let mut op_stack = SmallVec::<[OpAndSpan; 3]>::new();
126 let mut expr_queue = SmallVec::<[ExprNode; 5]>::new();
127
128 // Keep track of the last token to simplify validation of the token stream
129 let mut last_token: Option<Token<'_>> = None;
130
131 let apply_op = |op: OpAndSpan, q: &mut SmallVec<[ExprNode; 5]>| {
132 let op = match op.op {
133 Op::And => Operator::And,
134 Op::Or => Operator::Or,
135 Op::Open => unreachable!(),
136 };
137
138 q.push(ExprNode::Op(op));
139 Ok(())
140 };
141
142 let make_err_for_token = |last_token: Option<Token<'_>>, span: std::ops::Range<usize>| {
143 let expected: &[&str] = match last_token {
144 None | Some(Token::And | Token::Or | Token::OpenParen) => &["<license>", "("],
145 Some(Token::CloseParen) => &["AND", "OR"],
146 Some(Token::Exception(_)) => &["AND", "OR", ")"],
147 Some(Token::Spdx(_)) => &["AND", "OR", "WITH", ")", "+"],
148 Some(Token::LicenseRef { .. } | Token::Plus) => &["AND", "OR", "WITH", ")"],
149 Some(Token::With) => &["<exception>"],
150 };
151
152 Err(ParseError {
153 original: original.to_owned(),
154 span,
155 reason: Reason::Unexpected(expected),
156 })
157 };
158
159 // Basic implementation of the https://en.wikipedia.org/wiki/Shunting-yard_algorithm
160 'outer: for tok in lexer {
161 let lt = tok?;
162 match &lt.token {
163 Token::Spdx(id) => match last_token {
164 None | Some(Token::And | Token::Or | Token::OpenParen) => {
165 expr_queue.push(ExprNode::Req(ExpressionReq {
166 req: LicenseReq::from(*id),
167 span: lt.span.start as u32..lt.span.end as u32,
168 }));
169 }
170 _ => return make_err_for_token(last_token, lt.span),
171 },
172 Token::LicenseRef { doc_ref, lic_ref } => match last_token {
173 None | Some(Token::And | Token::Or | Token::OpenParen) => {
174 expr_queue.push(ExprNode::Req(ExpressionReq {
175 req: LicenseReq {
176 license: LicenseItem::Other {
177 doc_ref: doc_ref.map(String::from),
178 lic_ref: String::from(*lic_ref),
179 },
180 exception: None,
181 },
182 span: lt.span.start as u32..lt.span.end as u32,
183 }));
184 }
185 _ => return make_err_for_token(last_token, lt.span),
186 },
187 Token::Plus => match last_token {
188 Some(Token::Spdx(_)) => match expr_queue.last_mut().unwrap() {
189 ExprNode::Req(ExpressionReq {
190 req:
191 LicenseReq {
192 license: LicenseItem::Spdx { or_later, id },
193 ..
194 },
195 ..
196 }) => {
197 // Handle GNU licenses differently, as they should *NOT* be used with the `+`
198 if !mode.allow_postfix_plus_on_gpl && id.is_gnu() {
199 return Err(ParseError {
200 original: original.to_owned(),
201 span: lt.span,
202 reason: Reason::GnuNoPlus,
203 });
204 }
205
206 *or_later = true;
207 }
208 _ => unreachable!(),
209 },
210 _ => return make_err_for_token(last_token, lt.span),
211 },
212 Token::With => match last_token {
213 Some(Token::Spdx(_) | Token::LicenseRef { .. } | Token::Plus) => {}
214 _ => return make_err_for_token(last_token, lt.span),
215 },
216 Token::Or | Token::And => match last_token {
217 Some(
218 Token::Spdx(_)
219 | Token::LicenseRef { .. }
220 | Token::CloseParen
221 | Token::Exception(_)
222 | Token::Plus,
223 ) => {
224 let new_op = match lt.token {
225 Token::Or => Op::Or,
226 Token::And => Op::And,
227 _ => unreachable!(),
228 };
229
230 while let Some(op) = op_stack.last() {
231 match &op.op {
232 Op::Open => break,
233 top => {
234 if *top < new_op {
235 let top = op_stack.pop().unwrap();
236
237 match top.op {
238 Op::And | Op::Or => apply_op(top, &mut expr_queue)?,
239 Op::Open => unreachable!(),
240 }
241 } else {
242 break;
243 }
244 }
245 }
246 }
247
248 op_stack.push(OpAndSpan {
249 op: new_op,
250 span: lt.span,
251 });
252 }
253 _ => return make_err_for_token(last_token, lt.span),
254 },
255 Token::OpenParen => match last_token {
256 None | Some(Token::And | Token::Or | Token::OpenParen) => {
257 op_stack.push(OpAndSpan {
258 op: Op::Open,
259 span: lt.span,
260 });
261 }
262 _ => return make_err_for_token(last_token, lt.span),
263 },
264 Token::CloseParen => {
265 match last_token {
266 Some(
267 Token::Spdx(_)
268 | Token::LicenseRef { .. }
269 | Token::Plus
270 | Token::Exception(_)
271 | Token::CloseParen,
272 ) => {
273 while let Some(top) = op_stack.pop() {
274 match top.op {
275 Op::And | Op::Or => apply_op(top, &mut expr_queue)?,
276 Op::Open => {
277 // This is the only place we go back to the top of the outer loop,
278 // so make sure we correctly record this token
279 last_token = Some(Token::CloseParen);
280 continue 'outer;
281 }
282 }
283 }
284
285 // We didn't have an opening parentheses if we get here
286 return Err(ParseError {
287 original: original.to_owned(),
288 span: lt.span,
289 reason: Reason::UnopenedParens,
290 });
291 }
292 _ => return make_err_for_token(last_token, lt.span),
293 }
294 }
295 Token::Exception(exc) => match last_token {
296 Some(Token::With) => match expr_queue.last_mut() {
297 Some(ExprNode::Req(lic)) => {
298 lic.req.exception = Some(*exc);
299 }
300 _ => unreachable!(),
301 },
302 _ => return make_err_for_token(last_token, lt.span),
303 },
304 }
305
306 last_token = Some(lt.token);
307 }
308
309 // Validate that the terminating token is valid
310 match last_token {
311 Some(
312 Token::Spdx(_)
313 | Token::LicenseRef { .. }
314 | Token::Exception(_)
315 | Token::CloseParen
316 | Token::Plus,
317 ) => {}
318 // We have to have at least one valid license requirement
319 None => {
320 return Err(ParseError {
321 original: original.to_owned(),
322 span: 0..original.len(),
323 reason: Reason::Empty,
324 });
325 }
326 Some(_) => return make_err_for_token(last_token, original.len()..original.len()),
327 }
328
329 while let Some(top) = op_stack.pop() {
330 match top.op {
331 Op::And | Op::Or => apply_op(top, &mut expr_queue)?,
332 Op::Open => {
333 return Err(ParseError {
334 original: original.to_owned(),
335 span: top.span,
336 reason: Reason::UnclosedParens,
337 });
338 }
339 }
340 }
341
342 // TODO: Investigate using https://github.com/oli-obk/quine-mc_cluskey to simplify
343 // expressions, but not really critical. Just cool.
344
345 Ok(Expression {
346 original: original.to_owned(),
347 expr: expr_queue,
348 })
349 }
350}
351