| 1 | use crate::error::{ParseError, Reason}; |
| 2 | |
| 3 | /// A single token in a cfg expression |
| 4 | /// <https://doc.rust-lang.org/reference/conditional-compilation.html> |
| 5 | #[derive (Clone, Debug, PartialEq, Eq)] |
| 6 | pub enum Token<'a> { |
| 7 | /// A single contiguous term |
| 8 | Key(&'a str), |
| 9 | /// A single contiguous value, without its surrounding quotes |
| 10 | Value(&'a str), |
| 11 | /// A '=', joining a key and a value |
| 12 | Equals, |
| 13 | /// Beginning of an `all()` predicate list |
| 14 | All, |
| 15 | /// Beginning of an `any()` predicate list |
| 16 | Any, |
| 17 | /// Beginning of a `not()` predicate |
| 18 | Not, |
| 19 | /// A `(` for starting a predicate list |
| 20 | OpenParen, |
| 21 | /// A `)` for ending a predicate list |
| 22 | CloseParen, |
| 23 | /// A `,` for separating predicates in a predicate list |
| 24 | Comma, |
| 25 | } |
| 26 | |
| 27 | impl std::fmt::Display for Token<'_> { |
| 28 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
| 29 | std::fmt::Debug::fmt(self, f) |
| 30 | } |
| 31 | } |
| 32 | |
| 33 | impl Token<'_> { |
| 34 | #[inline ] |
| 35 | fn len(&self) -> usize { |
| 36 | match self { |
| 37 | Token::Key(s: &&str) => s.len(), |
| 38 | Token::Value(s: &&str) => s.len() + 2, |
| 39 | Token::Equals | Token::OpenParen | Token::CloseParen | Token::Comma => 1, |
| 40 | Token::All | Token::Any | Token::Not => 3, |
| 41 | } |
| 42 | } |
| 43 | } |
| 44 | |
| 45 | /// Allows iteration through a cfg expression, yielding |
| 46 | /// a token or a `ParseError`. |
| 47 | /// |
| 48 | /// Prefer to use `Expression::parse` rather than directly |
| 49 | /// using the lexer |
| 50 | pub struct Lexer<'a> { |
| 51 | pub(super) inner: &'a str, |
| 52 | original: &'a str, |
| 53 | offset: usize, |
| 54 | } |
| 55 | |
| 56 | impl<'a> Lexer<'a> { |
| 57 | /// Creates a Lexer over a cfg expression, it can either be |
| 58 | /// a raw expression eg `key` or in attribute form, eg `cfg(key)` |
| 59 | pub fn new(text: &'a str) -> Self { |
| 60 | let text: &str = if text.starts_with("cfg(" ) && text.ends_with(')' ) { |
| 61 | &text[4..text.len() - 1] |
| 62 | } else { |
| 63 | text |
| 64 | }; |
| 65 | |
| 66 | Self { |
| 67 | inner: text, |
| 68 | original: text, |
| 69 | offset: 0, |
| 70 | } |
| 71 | } |
| 72 | } |
| 73 | |
| 74 | /// A wrapper around a particular token that includes the span of the characters |
| 75 | /// in the original string, for diagnostic purposes |
| 76 | #[derive (Debug)] |
| 77 | pub struct LexerToken<'a> { |
| 78 | /// The token that was lexed |
| 79 | pub token: Token<'a>, |
| 80 | /// The range of the token characters in the original license expression |
| 81 | pub span: std::ops::Range<usize>, |
| 82 | } |
| 83 | |
| 84 | impl<'a> Iterator for Lexer<'a> { |
| 85 | type Item = Result<LexerToken<'a>, ParseError>; |
| 86 | |
| 87 | fn next(&mut self) -> Option<Self::Item> { |
| 88 | // Jump over any whitespace, updating `self.inner` and `self.offset` appropriately |
| 89 | let non_whitespace_index = match self.inner.find(|c: char| !c.is_whitespace()) { |
| 90 | Some(idx) => idx, |
| 91 | None => self.inner.len(), |
| 92 | }; |
| 93 | |
| 94 | self.inner = &self.inner[non_whitespace_index..]; |
| 95 | self.offset += non_whitespace_index; |
| 96 | |
| 97 | #[inline ] |
| 98 | fn is_ident_start(ch: char) -> bool { |
| 99 | ch == '_' || ch.is_ascii_lowercase() || ch.is_ascii_uppercase() |
| 100 | } |
| 101 | |
| 102 | #[inline ] |
| 103 | fn is_ident_rest(ch: char) -> bool { |
| 104 | is_ident_start(ch) || ch.is_ascii_digit() |
| 105 | } |
| 106 | |
| 107 | match self.inner.chars().next() { |
| 108 | None => None, |
| 109 | Some('=' ) => Some(Ok(Token::Equals)), |
| 110 | Some('(' ) => Some(Ok(Token::OpenParen)), |
| 111 | Some(')' ) => Some(Ok(Token::CloseParen)), |
| 112 | Some(',' ) => Some(Ok(Token::Comma)), |
| 113 | Some(c) => { |
| 114 | if c == '"' { |
| 115 | match self.inner[1..].find('"' ) { |
| 116 | Some(ind) => Some(Ok(Token::Value(&self.inner[1..=ind]))), |
| 117 | None => Some(Err(ParseError { |
| 118 | original: self.original.to_owned(), |
| 119 | span: self.offset..self.original.len(), |
| 120 | reason: Reason::UnclosedQuotes, |
| 121 | })), |
| 122 | } |
| 123 | } else if is_ident_start(c) { |
| 124 | let substr = match self.inner[1..].find(|c: char| !is_ident_rest(c)) { |
| 125 | Some(ind) => &self.inner[..=ind], |
| 126 | None => self.inner, |
| 127 | }; |
| 128 | |
| 129 | match substr { |
| 130 | "all" => Some(Ok(Token::All)), |
| 131 | "any" => Some(Ok(Token::Any)), |
| 132 | "not" => Some(Ok(Token::Not)), |
| 133 | other => Some(Ok(Token::Key(other))), |
| 134 | } |
| 135 | } else { |
| 136 | // clippy tries to help here, but we need |
| 137 | // a Range here, not a RangeInclusive<> |
| 138 | #[allow (clippy::range_plus_one)] |
| 139 | Some(Err(ParseError { |
| 140 | original: self.original.to_owned(), |
| 141 | span: self.offset..self.offset + 1, |
| 142 | reason: Reason::Unexpected(&["<key>" , "all" , "any" , "not" ]), |
| 143 | })) |
| 144 | } |
| 145 | } |
| 146 | } |
| 147 | .map(|tok| { |
| 148 | tok.map(|tok| { |
| 149 | let len = tok.len(); |
| 150 | |
| 151 | let start = self.offset; |
| 152 | self.inner = &self.inner[len..]; |
| 153 | self.offset += len; |
| 154 | |
| 155 | LexerToken { |
| 156 | token: tok, |
| 157 | span: start..self.offset, |
| 158 | } |
| 159 | }) |
| 160 | }) |
| 161 | } |
| 162 | } |
| 163 | |