1 | use crate::error::{ParseError, Reason}; |
2 | |
3 | /// A single token in a cfg expression |
4 | /// <https://doc.rust-lang.org/reference/conditional-compilation.html> |
5 | #[derive (Clone, Debug, PartialEq, Eq)] |
6 | pub enum Token<'a> { |
7 | /// A single contiguous term |
8 | Key(&'a str), |
9 | /// A single contiguous value, without its surrounding quotes |
10 | Value(&'a str), |
11 | /// A '=', joining a key and a value |
12 | Equals, |
13 | /// Beginning of an `all()` predicate list |
14 | All, |
15 | /// Beginning of an `any()` predicate list |
16 | Any, |
17 | /// Beginning of a `not()` predicate |
18 | Not, |
19 | /// A `(` for starting a predicate list |
20 | OpenParen, |
21 | /// A `)` for ending a predicate list |
22 | CloseParen, |
23 | /// A `,` for separating predicates in a predicate list |
24 | Comma, |
25 | } |
26 | |
27 | impl std::fmt::Display for Token<'_> { |
28 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
29 | std::fmt::Debug::fmt(self, f) |
30 | } |
31 | } |
32 | |
33 | impl Token<'_> { |
34 | #[inline ] |
35 | fn len(&self) -> usize { |
36 | match self { |
37 | Token::Key(s: &&str) => s.len(), |
38 | Token::Value(s: &&str) => s.len() + 2, |
39 | Token::Equals | Token::OpenParen | Token::CloseParen | Token::Comma => 1, |
40 | Token::All | Token::Any | Token::Not => 3, |
41 | } |
42 | } |
43 | } |
44 | |
45 | /// Allows iteration through a cfg expression, yielding |
46 | /// a token or a `ParseError`. |
47 | /// |
48 | /// Prefer to use `Expression::parse` rather than directly |
49 | /// using the lexer |
50 | pub struct Lexer<'a> { |
51 | pub(super) inner: &'a str, |
52 | original: &'a str, |
53 | offset: usize, |
54 | } |
55 | |
56 | impl<'a> Lexer<'a> { |
57 | /// Creates a Lexer over a cfg expression, it can either be |
58 | /// a raw expression eg `key` or in attribute form, eg `cfg(key)` |
59 | pub fn new(text: &'a str) -> Self { |
60 | let text: &str = if text.starts_with("cfg(" ) && text.ends_with(')' ) { |
61 | &text[4..text.len() - 1] |
62 | } else { |
63 | text |
64 | }; |
65 | |
66 | Self { |
67 | inner: text, |
68 | original: text, |
69 | offset: 0, |
70 | } |
71 | } |
72 | } |
73 | |
74 | /// A wrapper around a particular token that includes the span of the characters |
75 | /// in the original string, for diagnostic purposes |
76 | #[derive (Debug)] |
77 | pub struct LexerToken<'a> { |
78 | /// The token that was lexed |
79 | pub token: Token<'a>, |
80 | /// The range of the token characters in the original license expression |
81 | pub span: std::ops::Range<usize>, |
82 | } |
83 | |
84 | impl<'a> Iterator for Lexer<'a> { |
85 | type Item = Result<LexerToken<'a>, ParseError>; |
86 | |
87 | fn next(&mut self) -> Option<Self::Item> { |
88 | // Jump over any whitespace, updating `self.inner` and `self.offset` appropriately |
89 | let non_whitespace_index = match self.inner.find(|c: char| !c.is_whitespace()) { |
90 | Some(idx) => idx, |
91 | None => self.inner.len(), |
92 | }; |
93 | |
94 | self.inner = &self.inner[non_whitespace_index..]; |
95 | self.offset += non_whitespace_index; |
96 | |
97 | #[inline ] |
98 | fn is_ident_start(ch: char) -> bool { |
99 | ch == '_' || ch.is_ascii_lowercase() || ch.is_ascii_uppercase() |
100 | } |
101 | |
102 | #[inline ] |
103 | fn is_ident_rest(ch: char) -> bool { |
104 | is_ident_start(ch) || ch.is_ascii_digit() |
105 | } |
106 | |
107 | match self.inner.chars().next() { |
108 | None => None, |
109 | Some('=' ) => Some(Ok(Token::Equals)), |
110 | Some('(' ) => Some(Ok(Token::OpenParen)), |
111 | Some(')' ) => Some(Ok(Token::CloseParen)), |
112 | Some(',' ) => Some(Ok(Token::Comma)), |
113 | Some(c) => { |
114 | if c == '"' { |
115 | match self.inner[1..].find('"' ) { |
116 | Some(ind) => Some(Ok(Token::Value(&self.inner[1..=ind]))), |
117 | None => Some(Err(ParseError { |
118 | original: self.original.to_owned(), |
119 | span: self.offset..self.original.len(), |
120 | reason: Reason::UnclosedQuotes, |
121 | })), |
122 | } |
123 | } else if is_ident_start(c) { |
124 | let substr = match self.inner[1..].find(|c: char| !is_ident_rest(c)) { |
125 | Some(ind) => &self.inner[..=ind], |
126 | None => self.inner, |
127 | }; |
128 | |
129 | match substr { |
130 | "all" => Some(Ok(Token::All)), |
131 | "any" => Some(Ok(Token::Any)), |
132 | "not" => Some(Ok(Token::Not)), |
133 | other => Some(Ok(Token::Key(other))), |
134 | } |
135 | } else { |
136 | // clippy tries to help here, but we need |
137 | // a Range here, not a RangeInclusive<> |
138 | #[allow (clippy::range_plus_one)] |
139 | Some(Err(ParseError { |
140 | original: self.original.to_owned(), |
141 | span: self.offset..self.offset + 1, |
142 | reason: Reason::Unexpected(&["<key>" , "all" , "any" , "not" ]), |
143 | })) |
144 | } |
145 | } |
146 | } |
147 | .map(|tok| { |
148 | tok.map(|tok| { |
149 | let len = tok.len(); |
150 | |
151 | let start = self.offset; |
152 | self.inner = &self.inner[len..]; |
153 | self.offset += len; |
154 | |
155 | LexerToken { |
156 | token: tok, |
157 | span: start..self.offset, |
158 | } |
159 | }) |
160 | }) |
161 | } |
162 | } |
163 | |