1 | use crate::{ |
2 | error::{ParseError, Reason}, |
3 | expression::{ExprNode, Expression, ExpressionReq, Operator}, |
4 | lexer::{Lexer, Token}, |
5 | LicenseItem, LicenseReq, ParseMode, |
6 | }; |
7 | use smallvec::SmallVec; |
8 | |
9 | impl Expression { |
10 | /// Given a license expression, attempts to parse and validate it as a valid |
11 | /// SPDX expression. Uses `ParseMode::Strict`. |
12 | /// |
13 | /// The validation can fail for many reasons: |
14 | /// * The expression contains invalid characters |
15 | /// * An unknown/invalid license or exception identifier was found. Only |
16 | /// [SPDX short identifiers](https://spdx.org/ids) are allowed |
17 | /// * The expression contained unbalanced parentheses |
18 | /// * A license or exception immediately follows another license or exception, without |
19 | /// a valid AND, OR, or WITH operator separating them |
20 | /// * An AND, OR, or WITH doesn't have a license or `)` preceding it |
21 | /// |
22 | /// ``` |
23 | /// spdx::Expression::parse("MIT OR Apache-2.0 WITH LLVM-exception" ).unwrap(); |
24 | /// ``` |
25 | pub fn parse(original: &str) -> Result<Self, ParseError> { |
26 | Self::parse_mode(original, ParseMode::STRICT) |
27 | } |
28 | |
29 | /// Canonicalizes the input expression into a form that can be parsed with |
30 | /// [`ParseMode::STRICT`] |
31 | /// |
32 | /// ## Transforms |
33 | /// |
34 | /// 1. '/' is replaced with ' OR ' |
35 | /// 1. Lower-cased operators ('or', 'and', 'with') are upper-cased |
36 | /// 1. '+' is tranformed to `-or-later` for GNU licenses |
37 | /// 1. Invalid/imprecise license identifiers (eg. `apache2`) are replaced |
38 | /// with their valid identifiers |
39 | /// |
40 | /// If the provided expression is not modified then `None` is returned |
41 | /// |
42 | /// Note that this only does fixup of otherwise valid expressions, passing |
43 | /// the resulting string to [`Expression::parse`] can still result in |
44 | /// additional parse errors, eg. unbalanced parentheses |
45 | /// |
46 | /// ``` |
47 | /// assert_eq!(spdx::Expression::canonicalize("apache with LLVM-exception/gpl-3.0+" ).unwrap().unwrap(), "Apache-2.0 WITH LLVM-exception OR GPL-3.0-or-later" ); |
48 | /// ``` |
49 | pub fn canonicalize(original: &str) -> Result<Option<String>, ParseError> { |
50 | let mut can = String::with_capacity(original.len()); |
51 | |
52 | let lexer = Lexer::new_mode(original, ParseMode::LAX); |
53 | |
54 | // Keep track if the last license id is a GNU license that uses the -or-later |
55 | // convention rather than the + like all other licenses |
56 | let mut last_is_gnu = false; |
57 | for tok in lexer { |
58 | let tok = tok?; |
59 | |
60 | match tok.token { |
61 | Token::Spdx(id) => { |
62 | last_is_gnu = id.is_gnu(); |
63 | can.push_str(id.name); |
64 | } |
65 | Token::And => can.push_str(" AND " ), |
66 | Token::Or => can.push_str(" OR " ), |
67 | Token::With => can.push_str(" WITH " ), |
68 | Token::Plus => { |
69 | if last_is_gnu { |
70 | can.push_str("-or-later" ); |
71 | } else { |
72 | can.push('+' ); |
73 | } |
74 | } |
75 | Token::OpenParen => can.push('(' ), |
76 | Token::CloseParen => can.push(')' ), |
77 | Token::Exception(exc) => can.push_str(exc.name), |
78 | Token::LicenseRef { doc_ref, lic_ref } => { |
79 | if let Some(dr) = doc_ref { |
80 | can.push_str("DocumentRef-" ); |
81 | can.push_str(dr); |
82 | can.push(':' ); |
83 | } |
84 | |
85 | can.push_str("LicenseRef-" ); |
86 | can.push_str(lic_ref); |
87 | } |
88 | } |
89 | } |
90 | |
91 | Ok((can != original).then_some(can)) |
92 | } |
93 | |
94 | /// Parses an expression with the specified `ParseMode`. With |
95 | /// `ParseMode::Lax` it permits some non-SPDX syntax, such as imprecise |
96 | /// license names and "/" used instead of "OR" in exprssions. |
97 | /// |
98 | /// ``` |
99 | /// spdx::Expression::parse_mode( |
100 | /// "mit/Apache-2.0 WITH LLVM-exception" , |
101 | /// spdx::ParseMode::LAX |
102 | /// ).unwrap(); |
103 | /// ``` |
104 | pub fn parse_mode(original: &str, mode: ParseMode) -> Result<Self, ParseError> { |
105 | // Operator precedence in SPDX 2.1 |
106 | // + |
107 | // WITH |
108 | // AND |
109 | // OR |
110 | #[derive (Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] |
111 | enum Op { |
112 | //Plus, |
113 | //With, |
114 | And, |
115 | Or, |
116 | Open, |
117 | } |
118 | |
119 | struct OpAndSpan { |
120 | op: Op, |
121 | span: std::ops::Range<usize>, |
122 | } |
123 | |
124 | let lexer = Lexer::new_mode(original, mode); |
125 | let mut op_stack = SmallVec::<[OpAndSpan; 3]>::new(); |
126 | let mut expr_queue = SmallVec::<[ExprNode; 5]>::new(); |
127 | |
128 | // Keep track of the last token to simplify validation of the token stream |
129 | let mut last_token: Option<Token<'_>> = None; |
130 | |
131 | let apply_op = |op: OpAndSpan, q: &mut SmallVec<[ExprNode; 5]>| { |
132 | let op = match op.op { |
133 | Op::And => Operator::And, |
134 | Op::Or => Operator::Or, |
135 | Op::Open => unreachable!(), |
136 | }; |
137 | |
138 | q.push(ExprNode::Op(op)); |
139 | Ok(()) |
140 | }; |
141 | |
142 | let make_err_for_token = |last_token: Option<Token<'_>>, span: std::ops::Range<usize>| { |
143 | let expected: &[&str] = match last_token { |
144 | None | Some(Token::And | Token::Or | Token::OpenParen) => &["<license>" , "(" ], |
145 | Some(Token::CloseParen) => &["AND" , "OR" ], |
146 | Some(Token::Exception(_)) => &["AND" , "OR" , ")" ], |
147 | Some(Token::Spdx(_)) => &["AND" , "OR" , "WITH" , ")" , "+" ], |
148 | Some(Token::LicenseRef { .. } | Token::Plus) => &["AND" , "OR" , "WITH" , ")" ], |
149 | Some(Token::With) => &["<exception>" ], |
150 | }; |
151 | |
152 | Err(ParseError { |
153 | original: original.to_owned(), |
154 | span, |
155 | reason: Reason::Unexpected(expected), |
156 | }) |
157 | }; |
158 | |
159 | // Basic implementation of the https://en.wikipedia.org/wiki/Shunting-yard_algorithm |
160 | 'outer: for tok in lexer { |
161 | let lt = tok?; |
162 | match <.token { |
163 | Token::Spdx(id) => match last_token { |
164 | None | Some(Token::And | Token::Or | Token::OpenParen) => { |
165 | expr_queue.push(ExprNode::Req(ExpressionReq { |
166 | req: LicenseReq::from(*id), |
167 | span: lt.span.start as u32..lt.span.end as u32, |
168 | })); |
169 | } |
170 | _ => return make_err_for_token(last_token, lt.span), |
171 | }, |
172 | Token::LicenseRef { doc_ref, lic_ref } => match last_token { |
173 | None | Some(Token::And | Token::Or | Token::OpenParen) => { |
174 | expr_queue.push(ExprNode::Req(ExpressionReq { |
175 | req: LicenseReq { |
176 | license: LicenseItem::Other { |
177 | doc_ref: doc_ref.map(String::from), |
178 | lic_ref: String::from(*lic_ref), |
179 | }, |
180 | exception: None, |
181 | }, |
182 | span: lt.span.start as u32..lt.span.end as u32, |
183 | })); |
184 | } |
185 | _ => return make_err_for_token(last_token, lt.span), |
186 | }, |
187 | Token::Plus => match last_token { |
188 | Some(Token::Spdx(_)) => match expr_queue.last_mut().unwrap() { |
189 | ExprNode::Req(ExpressionReq { |
190 | req: |
191 | LicenseReq { |
192 | license: LicenseItem::Spdx { or_later, id }, |
193 | .. |
194 | }, |
195 | .. |
196 | }) => { |
197 | // Handle GNU licenses differently, as they should *NOT* be used with the `+` |
198 | if !mode.allow_postfix_plus_on_gpl && id.is_gnu() { |
199 | return Err(ParseError { |
200 | original: original.to_owned(), |
201 | span: lt.span, |
202 | reason: Reason::GnuNoPlus, |
203 | }); |
204 | } |
205 | |
206 | *or_later = true; |
207 | } |
208 | _ => unreachable!(), |
209 | }, |
210 | _ => return make_err_for_token(last_token, lt.span), |
211 | }, |
212 | Token::With => match last_token { |
213 | Some(Token::Spdx(_) | Token::LicenseRef { .. } | Token::Plus) => {} |
214 | _ => return make_err_for_token(last_token, lt.span), |
215 | }, |
216 | Token::Or | Token::And => match last_token { |
217 | Some( |
218 | Token::Spdx(_) |
219 | | Token::LicenseRef { .. } |
220 | | Token::CloseParen |
221 | | Token::Exception(_) |
222 | | Token::Plus, |
223 | ) => { |
224 | let new_op = match lt.token { |
225 | Token::Or => Op::Or, |
226 | Token::And => Op::And, |
227 | _ => unreachable!(), |
228 | }; |
229 | |
230 | while let Some(op) = op_stack.last() { |
231 | match &op.op { |
232 | Op::Open => break, |
233 | top => { |
234 | if *top < new_op { |
235 | let top = op_stack.pop().unwrap(); |
236 | |
237 | match top.op { |
238 | Op::And | Op::Or => apply_op(top, &mut expr_queue)?, |
239 | Op::Open => unreachable!(), |
240 | } |
241 | } else { |
242 | break; |
243 | } |
244 | } |
245 | } |
246 | } |
247 | |
248 | op_stack.push(OpAndSpan { |
249 | op: new_op, |
250 | span: lt.span, |
251 | }); |
252 | } |
253 | _ => return make_err_for_token(last_token, lt.span), |
254 | }, |
255 | Token::OpenParen => match last_token { |
256 | None | Some(Token::And | Token::Or | Token::OpenParen) => { |
257 | op_stack.push(OpAndSpan { |
258 | op: Op::Open, |
259 | span: lt.span, |
260 | }); |
261 | } |
262 | _ => return make_err_for_token(last_token, lt.span), |
263 | }, |
264 | Token::CloseParen => { |
265 | match last_token { |
266 | Some( |
267 | Token::Spdx(_) |
268 | | Token::LicenseRef { .. } |
269 | | Token::Plus |
270 | | Token::Exception(_) |
271 | | Token::CloseParen, |
272 | ) => { |
273 | while let Some(top) = op_stack.pop() { |
274 | match top.op { |
275 | Op::And | Op::Or => apply_op(top, &mut expr_queue)?, |
276 | Op::Open => { |
277 | // This is the only place we go back to the top of the outer loop, |
278 | // so make sure we correctly record this token |
279 | last_token = Some(Token::CloseParen); |
280 | continue 'outer; |
281 | } |
282 | } |
283 | } |
284 | |
285 | // We didn't have an opening parentheses if we get here |
286 | return Err(ParseError { |
287 | original: original.to_owned(), |
288 | span: lt.span, |
289 | reason: Reason::UnopenedParens, |
290 | }); |
291 | } |
292 | _ => return make_err_for_token(last_token, lt.span), |
293 | } |
294 | } |
295 | Token::Exception(exc) => match last_token { |
296 | Some(Token::With) => match expr_queue.last_mut() { |
297 | Some(ExprNode::Req(lic)) => { |
298 | lic.req.exception = Some(*exc); |
299 | } |
300 | _ => unreachable!(), |
301 | }, |
302 | _ => return make_err_for_token(last_token, lt.span), |
303 | }, |
304 | } |
305 | |
306 | last_token = Some(lt.token); |
307 | } |
308 | |
309 | // Validate that the terminating token is valid |
310 | match last_token { |
311 | Some( |
312 | Token::Spdx(_) |
313 | | Token::LicenseRef { .. } |
314 | | Token::Exception(_) |
315 | | Token::CloseParen |
316 | | Token::Plus, |
317 | ) => {} |
318 | // We have to have at least one valid license requirement |
319 | None => { |
320 | return Err(ParseError { |
321 | original: original.to_owned(), |
322 | span: 0..original.len(), |
323 | reason: Reason::Empty, |
324 | }); |
325 | } |
326 | Some(_) => return make_err_for_token(last_token, original.len()..original.len()), |
327 | } |
328 | |
329 | while let Some(top) = op_stack.pop() { |
330 | match top.op { |
331 | Op::And | Op::Or => apply_op(top, &mut expr_queue)?, |
332 | Op::Open => { |
333 | return Err(ParseError { |
334 | original: original.to_owned(), |
335 | span: top.span, |
336 | reason: Reason::UnclosedParens, |
337 | }); |
338 | } |
339 | } |
340 | } |
341 | |
342 | // TODO: Investigate using https://github.com/oli-obk/quine-mc_cluskey to simplify |
343 | // expressions, but not really critical. Just cool. |
344 | |
345 | Ok(Expression { |
346 | original: original.to_owned(), |
347 | expr: expr_queue, |
348 | }) |
349 | } |
350 | } |
351 | |