1 | use crate::error::{ParseError, Reason}; |
2 | |
3 | /// A single token in a cfg expression |
4 | /// <https://doc.rust-lang.org/reference/conditional-compilation.html> |
5 | #[derive (Clone, Debug, PartialEq, Eq)] |
6 | pub enum Token<'a> { |
7 | /// A single contiguous term |
8 | Key(&'a str), |
9 | /// A single contiguous value, without its surrounding quotes |
10 | Value(&'a str), |
11 | /// A '=', joining a key and a value |
12 | Equals, |
13 | /// Beginning of an all() predicate list |
14 | All, |
15 | /// Beginning of an any() predicate list |
16 | Any, |
17 | /// Beginning of a not() predicate |
18 | Not, |
19 | /// A `(` for starting a predicate list |
20 | OpenParen, |
21 | /// A `)` for ending a predicate list |
22 | CloseParen, |
23 | /// A `,` for separating predicates in a predicate list |
24 | Comma, |
25 | } |
26 | |
27 | impl<'a> std::fmt::Display for Token<'a> { |
28 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
29 | std::fmt::Debug::fmt(self, f) |
30 | } |
31 | } |
32 | |
33 | impl<'a> Token<'a> { |
34 | fn len(&self) -> usize { |
35 | match self { |
36 | Token::Key(s: &&str) => s.len(), |
37 | Token::Value(s: &&str) => s.len() + 2, |
38 | Token::Equals | Token::OpenParen | Token::CloseParen | Token::Comma => 1, |
39 | Token::All | Token::Any | Token::Not => 3, |
40 | } |
41 | } |
42 | } |
43 | |
44 | /// Allows iteration through a cfg expression, yielding |
45 | /// a token or a `ParseError`. |
46 | /// |
47 | /// Prefer to use `Expression::parse` rather than directly |
48 | /// using the lexer |
49 | pub struct Lexer<'a> { |
50 | pub(super) inner: &'a str, |
51 | original: &'a str, |
52 | offset: usize, |
53 | } |
54 | |
55 | impl<'a> Lexer<'a> { |
56 | /// Creates a Lexer over a cfg expression, it can either be |
57 | /// a raw expression eg `key` or in attribute form, eg `cfg(key)` |
58 | pub fn new(text: &'a str) -> Self { |
59 | let text: &str = if text.starts_with("cfg(" ) && text.ends_with(')' ) { |
60 | &text[4..text.len() - 1] |
61 | } else { |
62 | text |
63 | }; |
64 | |
65 | Self { |
66 | inner: text, |
67 | original: text, |
68 | offset: 0, |
69 | } |
70 | } |
71 | } |
72 | |
73 | /// A wrapper around a particular token that includes the span of the characters |
74 | /// in the original string, for diagnostic purposes |
75 | #[derive (Debug)] |
76 | pub struct LexerToken<'a> { |
77 | /// The token that was lexed |
78 | pub token: Token<'a>, |
79 | /// The range of the token characters in the original license expression |
80 | pub span: std::ops::Range<usize>, |
81 | } |
82 | |
83 | impl<'a> Iterator for Lexer<'a> { |
84 | type Item = Result<LexerToken<'a>, ParseError>; |
85 | |
86 | fn next(&mut self) -> Option<Self::Item> { |
87 | // Jump over any whitespace, updating `self.inner` and `self.offset` appropriately |
88 | let non_whitespace_index = match self.inner.find(|c: char| !c.is_whitespace()) { |
89 | Some(idx) => idx, |
90 | None => self.inner.len(), |
91 | }; |
92 | |
93 | self.inner = &self.inner[non_whitespace_index..]; |
94 | self.offset += non_whitespace_index; |
95 | |
96 | #[inline ] |
97 | fn is_ident_start(ch: char) -> bool { |
98 | ch == '_' || ch.is_ascii_lowercase() || ch.is_ascii_uppercase() |
99 | } |
100 | |
101 | #[inline ] |
102 | fn is_ident_rest(ch: char) -> bool { |
103 | is_ident_start(ch) || ch.is_ascii_digit() |
104 | } |
105 | |
106 | match self.inner.chars().next() { |
107 | None => None, |
108 | Some('=' ) => Some(Ok(Token::Equals)), |
109 | Some('(' ) => Some(Ok(Token::OpenParen)), |
110 | Some(')' ) => Some(Ok(Token::CloseParen)), |
111 | Some(',' ) => Some(Ok(Token::Comma)), |
112 | Some(c) => { |
113 | if c == '"' { |
114 | match self.inner[1..].find('"' ) { |
115 | Some(ind) => Some(Ok(Token::Value(&self.inner[1..=ind]))), |
116 | None => Some(Err(ParseError { |
117 | original: self.original.to_owned(), |
118 | span: self.offset..self.original.len(), |
119 | reason: Reason::UnclosedQuotes, |
120 | })), |
121 | } |
122 | } else if is_ident_start(c) { |
123 | let substr = match self.inner[1..].find(|c: char| !is_ident_rest(c)) { |
124 | Some(ind) => &self.inner[..=ind], |
125 | None => self.inner, |
126 | }; |
127 | |
128 | match substr { |
129 | "all" => Some(Ok(Token::All)), |
130 | "any" => Some(Ok(Token::Any)), |
131 | "not" => Some(Ok(Token::Not)), |
132 | other => Some(Ok(Token::Key(other))), |
133 | } |
134 | } else { |
135 | // clippy tries to help here, but we need |
136 | // a Range here, not a RangeInclusive<> |
137 | #[allow (clippy::range_plus_one)] |
138 | Some(Err(ParseError { |
139 | original: self.original.to_owned(), |
140 | span: self.offset..self.offset + 1, |
141 | reason: Reason::Unexpected(&["<key>" , "all" , "any" , "not" ]), |
142 | })) |
143 | } |
144 | } |
145 | } |
146 | .map(|tok| { |
147 | tok.map(|tok| { |
148 | let len = tok.len(); |
149 | |
150 | let start = self.offset; |
151 | self.inner = &self.inner[len..]; |
152 | self.offset += len; |
153 | |
154 | LexerToken { |
155 | token: tok, |
156 | span: start..self.offset, |
157 | } |
158 | }) |
159 | }) |
160 | } |
161 | } |
162 | |