| 1 | use crate::{ |
| 2 | error::{ParseError, Reason}, |
| 3 | expression::{ExprNode, Expression, ExpressionReq, Operator}, |
| 4 | lexer::{Lexer, Token}, |
| 5 | LicenseItem, LicenseReq, ParseMode, |
| 6 | }; |
| 7 | use smallvec::SmallVec; |
| 8 | |
| 9 | impl Expression { |
| 10 | /// Given a license expression, attempts to parse and validate it as a valid |
| 11 | /// SPDX expression. Uses `ParseMode::Strict`. |
| 12 | /// |
| 13 | /// The validation can fail for many reasons: |
| 14 | /// * The expression contains invalid characters |
| 15 | /// * An unknown/invalid license or exception identifier was found. Only |
| 16 | /// [SPDX short identifiers](https://spdx.org/ids) are allowed |
| 17 | /// * The expression contained unbalanced parentheses |
| 18 | /// * A license or exception immediately follows another license or exception, without |
| 19 | /// a valid AND, OR, or WITH operator separating them |
| 20 | /// * An AND, OR, or WITH doesn't have a license or `)` preceding it |
| 21 | /// |
| 22 | /// ``` |
| 23 | /// spdx::Expression::parse("MIT OR Apache-2.0 WITH LLVM-exception" ).unwrap(); |
| 24 | /// ``` |
| 25 | pub fn parse(original: &str) -> Result<Self, ParseError> { |
| 26 | Self::parse_mode(original, ParseMode::STRICT) |
| 27 | } |
| 28 | |
| 29 | /// Canonicalizes the input expression into a form that can be parsed with |
| 30 | /// [`ParseMode::STRICT`] |
| 31 | /// |
| 32 | /// ## Transforms |
| 33 | /// |
| 34 | /// 1. '/' is replaced with ' OR ' |
| 35 | /// 1. Lower-cased operators ('or', 'and', 'with') are upper-cased |
| 36 | /// 1. '+' is tranformed to `-or-later` for GNU licenses |
| 37 | /// 1. Invalid/imprecise license identifiers (eg. `apache2`) are replaced |
| 38 | /// with their valid identifiers |
| 39 | /// |
| 40 | /// If the provided expression is not modified then `None` is returned |
| 41 | /// |
| 42 | /// Note that this only does fixup of otherwise valid expressions, passing |
| 43 | /// the resulting string to [`Expression::parse`] can still result in |
| 44 | /// additional parse errors, eg. unbalanced parentheses |
| 45 | /// |
| 46 | /// ``` |
| 47 | /// assert_eq!(spdx::Expression::canonicalize("apache with LLVM-exception/gpl-3.0+" ).unwrap().unwrap(), "Apache-2.0 WITH LLVM-exception OR GPL-3.0-or-later" ); |
| 48 | /// ``` |
| 49 | pub fn canonicalize(original: &str) -> Result<Option<String>, ParseError> { |
| 50 | let mut can = String::with_capacity(original.len()); |
| 51 | |
| 52 | let lexer = Lexer::new_mode(original, ParseMode::LAX); |
| 53 | |
| 54 | // Keep track if the last license id is a GNU license that uses the -or-later |
| 55 | // convention rather than the + like all other licenses |
| 56 | let mut last_is_gnu = false; |
| 57 | for tok in lexer { |
| 58 | let tok = tok?; |
| 59 | |
| 60 | match tok.token { |
| 61 | Token::Spdx(id) => { |
| 62 | last_is_gnu = id.is_gnu(); |
| 63 | can.push_str(id.name); |
| 64 | } |
| 65 | Token::And => can.push_str(" AND " ), |
| 66 | Token::Or => can.push_str(" OR " ), |
| 67 | Token::With => can.push_str(" WITH " ), |
| 68 | Token::Plus => { |
| 69 | if last_is_gnu { |
| 70 | can.push_str("-or-later" ); |
| 71 | } else { |
| 72 | can.push('+' ); |
| 73 | } |
| 74 | } |
| 75 | Token::OpenParen => can.push('(' ), |
| 76 | Token::CloseParen => can.push(')' ), |
| 77 | Token::Exception(exc) => can.push_str(exc.name), |
| 78 | Token::LicenseRef { doc_ref, lic_ref } => { |
| 79 | if let Some(dr) = doc_ref { |
| 80 | can.push_str("DocumentRef-" ); |
| 81 | can.push_str(dr); |
| 82 | can.push(':' ); |
| 83 | } |
| 84 | |
| 85 | can.push_str("LicenseRef-" ); |
| 86 | can.push_str(lic_ref); |
| 87 | } |
| 88 | } |
| 89 | } |
| 90 | |
| 91 | Ok((can != original).then_some(can)) |
| 92 | } |
| 93 | |
| 94 | /// Parses an expression with the specified `ParseMode`. With |
| 95 | /// `ParseMode::Lax` it permits some non-SPDX syntax, such as imprecise |
| 96 | /// license names and "/" used instead of "OR" in exprssions. |
| 97 | /// |
| 98 | /// ``` |
| 99 | /// spdx::Expression::parse_mode( |
| 100 | /// "mit/Apache-2.0 WITH LLVM-exception" , |
| 101 | /// spdx::ParseMode::LAX |
| 102 | /// ).unwrap(); |
| 103 | /// ``` |
| 104 | pub fn parse_mode(original: &str, mode: ParseMode) -> Result<Self, ParseError> { |
| 105 | // Operator precedence in SPDX 2.1 |
| 106 | // + |
| 107 | // WITH |
| 108 | // AND |
| 109 | // OR |
| 110 | #[derive (Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] |
| 111 | enum Op { |
| 112 | //Plus, |
| 113 | //With, |
| 114 | And, |
| 115 | Or, |
| 116 | Open, |
| 117 | } |
| 118 | |
| 119 | struct OpAndSpan { |
| 120 | op: Op, |
| 121 | span: std::ops::Range<usize>, |
| 122 | } |
| 123 | |
| 124 | let lexer = Lexer::new_mode(original, mode); |
| 125 | let mut op_stack = SmallVec::<[OpAndSpan; 3]>::new(); |
| 126 | let mut expr_queue = SmallVec::<[ExprNode; 5]>::new(); |
| 127 | |
| 128 | // Keep track of the last token to simplify validation of the token stream |
| 129 | let mut last_token: Option<Token<'_>> = None; |
| 130 | |
| 131 | let apply_op = |op: OpAndSpan, q: &mut SmallVec<[ExprNode; 5]>| { |
| 132 | let op = match op.op { |
| 133 | Op::And => Operator::And, |
| 134 | Op::Or => Operator::Or, |
| 135 | Op::Open => unreachable!(), |
| 136 | }; |
| 137 | |
| 138 | q.push(ExprNode::Op(op)); |
| 139 | Ok(()) |
| 140 | }; |
| 141 | |
| 142 | let make_err_for_token = |last_token: Option<Token<'_>>, span: std::ops::Range<usize>| { |
| 143 | let expected: &[&str] = match last_token { |
| 144 | None | Some(Token::And | Token::Or | Token::OpenParen) => &["<license>" , "(" ], |
| 145 | Some(Token::CloseParen) => &["AND" , "OR" ], |
| 146 | Some(Token::Exception(_)) => &["AND" , "OR" , ")" ], |
| 147 | Some(Token::Spdx(_)) => &["AND" , "OR" , "WITH" , ")" , "+" ], |
| 148 | Some(Token::LicenseRef { .. } | Token::Plus) => &["AND" , "OR" , "WITH" , ")" ], |
| 149 | Some(Token::With) => &["<exception>" ], |
| 150 | }; |
| 151 | |
| 152 | Err(ParseError { |
| 153 | original: original.to_owned(), |
| 154 | span, |
| 155 | reason: Reason::Unexpected(expected), |
| 156 | }) |
| 157 | }; |
| 158 | |
| 159 | // Basic implementation of the https://en.wikipedia.org/wiki/Shunting-yard_algorithm |
| 160 | 'outer: for tok in lexer { |
| 161 | let lt = tok?; |
| 162 | match <.token { |
| 163 | Token::Spdx(id) => match last_token { |
| 164 | None | Some(Token::And | Token::Or | Token::OpenParen) => { |
| 165 | expr_queue.push(ExprNode::Req(ExpressionReq { |
| 166 | req: LicenseReq::from(*id), |
| 167 | span: lt.span.start as u32..lt.span.end as u32, |
| 168 | })); |
| 169 | } |
| 170 | _ => return make_err_for_token(last_token, lt.span), |
| 171 | }, |
| 172 | Token::LicenseRef { doc_ref, lic_ref } => match last_token { |
| 173 | None | Some(Token::And | Token::Or | Token::OpenParen) => { |
| 174 | expr_queue.push(ExprNode::Req(ExpressionReq { |
| 175 | req: LicenseReq { |
| 176 | license: LicenseItem::Other { |
| 177 | doc_ref: doc_ref.map(String::from), |
| 178 | lic_ref: String::from(*lic_ref), |
| 179 | }, |
| 180 | exception: None, |
| 181 | }, |
| 182 | span: lt.span.start as u32..lt.span.end as u32, |
| 183 | })); |
| 184 | } |
| 185 | _ => return make_err_for_token(last_token, lt.span), |
| 186 | }, |
| 187 | Token::Plus => match last_token { |
| 188 | Some(Token::Spdx(_)) => match expr_queue.last_mut().unwrap() { |
| 189 | ExprNode::Req(ExpressionReq { |
| 190 | req: |
| 191 | LicenseReq { |
| 192 | license: LicenseItem::Spdx { or_later, id }, |
| 193 | .. |
| 194 | }, |
| 195 | .. |
| 196 | }) => { |
| 197 | // Handle GNU licenses differently, as they should *NOT* be used with the `+` |
| 198 | if !mode.allow_postfix_plus_on_gpl && id.is_gnu() { |
| 199 | return Err(ParseError { |
| 200 | original: original.to_owned(), |
| 201 | span: lt.span, |
| 202 | reason: Reason::GnuNoPlus, |
| 203 | }); |
| 204 | } |
| 205 | |
| 206 | *or_later = true; |
| 207 | } |
| 208 | _ => unreachable!(), |
| 209 | }, |
| 210 | _ => return make_err_for_token(last_token, lt.span), |
| 211 | }, |
| 212 | Token::With => match last_token { |
| 213 | Some(Token::Spdx(_) | Token::LicenseRef { .. } | Token::Plus) => {} |
| 214 | _ => return make_err_for_token(last_token, lt.span), |
| 215 | }, |
| 216 | Token::Or | Token::And => match last_token { |
| 217 | Some( |
| 218 | Token::Spdx(_) |
| 219 | | Token::LicenseRef { .. } |
| 220 | | Token::CloseParen |
| 221 | | Token::Exception(_) |
| 222 | | Token::Plus, |
| 223 | ) => { |
| 224 | let new_op = match lt.token { |
| 225 | Token::Or => Op::Or, |
| 226 | Token::And => Op::And, |
| 227 | _ => unreachable!(), |
| 228 | }; |
| 229 | |
| 230 | while let Some(op) = op_stack.last() { |
| 231 | match &op.op { |
| 232 | Op::Open => break, |
| 233 | top => { |
| 234 | if *top < new_op { |
| 235 | let top = op_stack.pop().unwrap(); |
| 236 | |
| 237 | match top.op { |
| 238 | Op::And | Op::Or => apply_op(top, &mut expr_queue)?, |
| 239 | Op::Open => unreachable!(), |
| 240 | } |
| 241 | } else { |
| 242 | break; |
| 243 | } |
| 244 | } |
| 245 | } |
| 246 | } |
| 247 | |
| 248 | op_stack.push(OpAndSpan { |
| 249 | op: new_op, |
| 250 | span: lt.span, |
| 251 | }); |
| 252 | } |
| 253 | _ => return make_err_for_token(last_token, lt.span), |
| 254 | }, |
| 255 | Token::OpenParen => match last_token { |
| 256 | None | Some(Token::And | Token::Or | Token::OpenParen) => { |
| 257 | op_stack.push(OpAndSpan { |
| 258 | op: Op::Open, |
| 259 | span: lt.span, |
| 260 | }); |
| 261 | } |
| 262 | _ => return make_err_for_token(last_token, lt.span), |
| 263 | }, |
| 264 | Token::CloseParen => { |
| 265 | match last_token { |
| 266 | Some( |
| 267 | Token::Spdx(_) |
| 268 | | Token::LicenseRef { .. } |
| 269 | | Token::Plus |
| 270 | | Token::Exception(_) |
| 271 | | Token::CloseParen, |
| 272 | ) => { |
| 273 | while let Some(top) = op_stack.pop() { |
| 274 | match top.op { |
| 275 | Op::And | Op::Or => apply_op(top, &mut expr_queue)?, |
| 276 | Op::Open => { |
| 277 | // This is the only place we go back to the top of the outer loop, |
| 278 | // so make sure we correctly record this token |
| 279 | last_token = Some(Token::CloseParen); |
| 280 | continue 'outer; |
| 281 | } |
| 282 | } |
| 283 | } |
| 284 | |
| 285 | // We didn't have an opening parentheses if we get here |
| 286 | return Err(ParseError { |
| 287 | original: original.to_owned(), |
| 288 | span: lt.span, |
| 289 | reason: Reason::UnopenedParens, |
| 290 | }); |
| 291 | } |
| 292 | _ => return make_err_for_token(last_token, lt.span), |
| 293 | } |
| 294 | } |
| 295 | Token::Exception(exc) => match last_token { |
| 296 | Some(Token::With) => match expr_queue.last_mut() { |
| 297 | Some(ExprNode::Req(lic)) => { |
| 298 | lic.req.exception = Some(*exc); |
| 299 | } |
| 300 | _ => unreachable!(), |
| 301 | }, |
| 302 | _ => return make_err_for_token(last_token, lt.span), |
| 303 | }, |
| 304 | } |
| 305 | |
| 306 | last_token = Some(lt.token); |
| 307 | } |
| 308 | |
| 309 | // Validate that the terminating token is valid |
| 310 | match last_token { |
| 311 | Some( |
| 312 | Token::Spdx(_) |
| 313 | | Token::LicenseRef { .. } |
| 314 | | Token::Exception(_) |
| 315 | | Token::CloseParen |
| 316 | | Token::Plus, |
| 317 | ) => {} |
| 318 | // We have to have at least one valid license requirement |
| 319 | None => { |
| 320 | return Err(ParseError { |
| 321 | original: original.to_owned(), |
| 322 | span: 0..original.len(), |
| 323 | reason: Reason::Empty, |
| 324 | }); |
| 325 | } |
| 326 | Some(_) => return make_err_for_token(last_token, original.len()..original.len()), |
| 327 | } |
| 328 | |
| 329 | while let Some(top) = op_stack.pop() { |
| 330 | match top.op { |
| 331 | Op::And | Op::Or => apply_op(top, &mut expr_queue)?, |
| 332 | Op::Open => { |
| 333 | return Err(ParseError { |
| 334 | original: original.to_owned(), |
| 335 | span: top.span, |
| 336 | reason: Reason::UnclosedParens, |
| 337 | }); |
| 338 | } |
| 339 | } |
| 340 | } |
| 341 | |
| 342 | // TODO: Investigate using https://github.com/oli-obk/quine-mc_cluskey to simplify |
| 343 | // expressions, but not really critical. Just cool. |
| 344 | |
| 345 | Ok(Expression { |
| 346 | original: original.to_owned(), |
| 347 | expr: expr_queue, |
| 348 | }) |
| 349 | } |
| 350 | } |
| 351 | |