| 1 | use core::iter; | 
| 2 |  | 
|---|
| 3 | use super::{Error, Location, Spanned, SpannedValue}; | 
|---|
| 4 |  | 
|---|
| 5 | pub(super) struct Lexed<I: Iterator> { | 
|---|
| 6 | iter: iter::Peekable<I>, | 
|---|
| 7 | } | 
|---|
| 8 |  | 
|---|
| 9 | impl<I: Iterator> Iterator for Lexed<I> { | 
|---|
| 10 | type Item = I::Item; | 
|---|
| 11 |  | 
|---|
| 12 | fn next(&mut self) -> Option<Self::Item> { | 
|---|
| 13 | self.iter.next() | 
|---|
| 14 | } | 
|---|
| 15 | } | 
|---|
| 16 |  | 
|---|
| 17 | impl<'iter, 'token: 'iter, I: Iterator<Item = Result<Token<'token>, Error>> + 'iter> Lexed<I> { | 
|---|
| 18 | pub(super) fn peek(&mut self) -> Option<&I::Item> { | 
|---|
| 19 | self.iter.peek() | 
|---|
| 20 | } | 
|---|
| 21 |  | 
|---|
| 22 | pub(super) fn next_if_whitespace(&mut self) -> Option<Spanned<&'token [u8]>> { | 
|---|
| 23 | if let Some(&Ok(Token::ComponentPart { | 
|---|
| 24 | kind: ComponentKind::Whitespace, | 
|---|
| 25 | value, | 
|---|
| 26 | })) = self.peek() | 
|---|
| 27 | { | 
|---|
| 28 | self.next(); // consume | 
|---|
| 29 | Some(value) | 
|---|
| 30 | } else { | 
|---|
| 31 | None | 
|---|
| 32 | } | 
|---|
| 33 | } | 
|---|
| 34 |  | 
|---|
| 35 | pub(super) fn next_if_not_whitespace(&mut self) -> Option<Spanned<&'token [u8]>> { | 
|---|
| 36 | if let Some(&Ok(Token::ComponentPart { | 
|---|
| 37 | kind: ComponentKind::NotWhitespace, | 
|---|
| 38 | value, | 
|---|
| 39 | })) = self.peek() | 
|---|
| 40 | { | 
|---|
| 41 | self.next(); | 
|---|
| 42 | Some(value) | 
|---|
| 43 | } else { | 
|---|
| 44 | None | 
|---|
| 45 | } | 
|---|
| 46 | } | 
|---|
| 47 |  | 
|---|
| 48 | pub(super) fn next_if_opening_bracket(&mut self) -> Option<Location> { | 
|---|
| 49 | if let Some(&Ok(Token::Bracket { | 
|---|
| 50 | kind: BracketKind::Opening, | 
|---|
| 51 | location, | 
|---|
| 52 | })) = self.peek() | 
|---|
| 53 | { | 
|---|
| 54 | self.next(); | 
|---|
| 55 | Some(location) | 
|---|
| 56 | } else { | 
|---|
| 57 | None | 
|---|
| 58 | } | 
|---|
| 59 | } | 
|---|
| 60 |  | 
|---|
| 61 | pub(super) fn peek_closing_bracket(&'iter mut self) -> Option<&'iter Location> { | 
|---|
| 62 | if let Some(Ok(Token::Bracket { | 
|---|
| 63 | kind: BracketKind::Closing, | 
|---|
| 64 | location, | 
|---|
| 65 | })) = self.peek() | 
|---|
| 66 | { | 
|---|
| 67 | Some(location) | 
|---|
| 68 | } else { | 
|---|
| 69 | None | 
|---|
| 70 | } | 
|---|
| 71 | } | 
|---|
| 72 |  | 
|---|
| 73 | pub(super) fn next_if_closing_bracket(&mut self) -> Option<Location> { | 
|---|
| 74 | if let Some(&Ok(Token::Bracket { | 
|---|
| 75 | kind: BracketKind::Closing, | 
|---|
| 76 | location, | 
|---|
| 77 | })) = self.peek() | 
|---|
| 78 | { | 
|---|
| 79 | self.next(); | 
|---|
| 80 | Some(location) | 
|---|
| 81 | } else { | 
|---|
| 82 | None | 
|---|
| 83 | } | 
|---|
| 84 | } | 
|---|
| 85 | } | 
|---|
| 86 |  | 
|---|
| 87 | pub(super) enum Token<'a> { | 
|---|
| 88 | Literal(Spanned<&'a [u8]>), | 
|---|
| 89 | Bracket { | 
|---|
| 90 | kind: BracketKind, | 
|---|
| 91 | location: Location, | 
|---|
| 92 | }, | 
|---|
| 93 | ComponentPart { | 
|---|
| 94 | kind: ComponentKind, | 
|---|
| 95 | value: Spanned<&'a [u8]>, | 
|---|
| 96 | }, | 
|---|
| 97 | } | 
|---|
| 98 |  | 
|---|
| 99 | pub(super) enum BracketKind { | 
|---|
| 100 | Opening, | 
|---|
| 101 | Closing, | 
|---|
| 102 | } | 
|---|
| 103 |  | 
|---|
| 104 | pub(super) enum ComponentKind { | 
|---|
| 105 | #[ allow(clippy::missing_docs_in_private_items)] | 
|---|
| 106 | Whitespace, | 
|---|
| 107 | #[ allow(clippy::missing_docs_in_private_items)] | 
|---|
| 108 | NotWhitespace, | 
|---|
| 109 | } | 
|---|
| 110 |  | 
|---|
| 111 | fn attach_location<'item>( | 
|---|
| 112 | iter: impl Iterator<Item = &'item u8>, | 
|---|
| 113 | proc_span: proc_macro::Span, | 
|---|
| 114 | ) -> impl Iterator<Item = (&'item u8, Location)> { | 
|---|
| 115 | let mut byte_pos: u32 = 0; | 
|---|
| 116 |  | 
|---|
| 117 | iter.map(move |byte: &'item u8| { | 
|---|
| 118 | let location: Location = Location { | 
|---|
| 119 | byte: byte_pos, | 
|---|
| 120 | proc_span, | 
|---|
| 121 | }; | 
|---|
| 122 | byte_pos += 1; | 
|---|
| 123 | (byte, location) | 
|---|
| 124 | }) | 
|---|
| 125 | } | 
|---|
| 126 |  | 
|---|
| 127 | #[ allow(clippy::unused_peekable)] // false positive | 
|---|
| 128 | pub(super) fn lex<const VERSION: u8>( | 
|---|
| 129 | mut input: &[u8], | 
|---|
| 130 | proc_span: proc_macro::Span, | 
|---|
| 131 | ) -> Lexed<impl Iterator<Item = Result<Token<'_>, Error>>> { | 
|---|
| 132 | assert!(version!(1..=2)); | 
|---|
| 133 |  | 
|---|
| 134 | let mut depth: u8 = 0; | 
|---|
| 135 | let mut iter = attach_location(input.iter(), proc_span).peekable(); | 
|---|
| 136 | let mut second_bracket_location = None; | 
|---|
| 137 |  | 
|---|
| 138 | let iter = iter::from_fn(move || { | 
|---|
| 139 | if version!(..=1) { | 
|---|
| 140 | if let Some(location) = second_bracket_location.take() { | 
|---|
| 141 | return Some(Ok(Token::Bracket { | 
|---|
| 142 | kind: BracketKind::Opening, | 
|---|
| 143 | location, | 
|---|
| 144 | })); | 
|---|
| 145 | } | 
|---|
| 146 | } | 
|---|
| 147 |  | 
|---|
| 148 | Some(Ok(match iter.next()? { | 
|---|
| 149 | ( b'\\ ', backslash_loc) if version!(2..) => match iter.next() { | 
|---|
| 150 | Some(( b'\\ '| b'['| b']', char_loc)) => { | 
|---|
| 151 | let char = &input[1..2]; | 
|---|
| 152 | input = &input[2..]; | 
|---|
| 153 | if depth == 0 { | 
|---|
| 154 | Token::Literal(char.spanned(backslash_loc.to(char_loc))) | 
|---|
| 155 | } else { | 
|---|
| 156 | Token::ComponentPart { | 
|---|
| 157 | kind: ComponentKind::NotWhitespace, | 
|---|
| 158 | value: char.spanned(backslash_loc.to(char_loc)), | 
|---|
| 159 | } | 
|---|
| 160 | } | 
|---|
| 161 | } | 
|---|
| 162 | Some((_, loc)) => { | 
|---|
| 163 | return Some(Err(loc.error( "invalid escape sequence"))); | 
|---|
| 164 | } | 
|---|
| 165 | None => { | 
|---|
| 166 | return Some(Err(backslash_loc.error( "unexpected end of input"))); | 
|---|
| 167 | } | 
|---|
| 168 | }, | 
|---|
| 169 | ( b'[', location) if version!(..=1) => { | 
|---|
| 170 | if let Some((_, second_location)) = iter.next_if(|&(&byte, _)| byte == b'[') { | 
|---|
| 171 | second_bracket_location = Some(second_location); | 
|---|
| 172 | input = &input[2..]; | 
|---|
| 173 | } else { | 
|---|
| 174 | depth += 1; | 
|---|
| 175 | input = &input[1..]; | 
|---|
| 176 | } | 
|---|
| 177 |  | 
|---|
| 178 | Token::Bracket { | 
|---|
| 179 | kind: BracketKind::Opening, | 
|---|
| 180 | location, | 
|---|
| 181 | } | 
|---|
| 182 | } | 
|---|
| 183 | ( b'[', location) => { | 
|---|
| 184 | depth += 1; | 
|---|
| 185 | input = &input[1..]; | 
|---|
| 186 |  | 
|---|
| 187 | Token::Bracket { | 
|---|
| 188 | kind: BracketKind::Opening, | 
|---|
| 189 | location, | 
|---|
| 190 | } | 
|---|
| 191 | } | 
|---|
| 192 | ( b']', location) if depth > 0 => { | 
|---|
| 193 | depth -= 1; | 
|---|
| 194 | input = &input[1..]; | 
|---|
| 195 |  | 
|---|
| 196 | Token::Bracket { | 
|---|
| 197 | kind: BracketKind::Closing, | 
|---|
| 198 | location, | 
|---|
| 199 | } | 
|---|
| 200 | } | 
|---|
| 201 | (_, start_location) if depth == 0 => { | 
|---|
| 202 | let mut bytes = 1; | 
|---|
| 203 | let mut end_location = start_location; | 
|---|
| 204 |  | 
|---|
| 205 | while let Some((_, location)) = | 
|---|
| 206 | iter.next_if(|&(&byte, _)| !((version!(2..) && byte == b'\\ ') || byte == b'[')) | 
|---|
| 207 | { | 
|---|
| 208 | end_location = location; | 
|---|
| 209 | bytes += 1; | 
|---|
| 210 | } | 
|---|
| 211 |  | 
|---|
| 212 | let value = &input[..bytes]; | 
|---|
| 213 | input = &input[bytes..]; | 
|---|
| 214 |  | 
|---|
| 215 | Token::Literal(value.spanned(start_location.to(end_location))) | 
|---|
| 216 | } | 
|---|
| 217 | (byte, start_location) => { | 
|---|
| 218 | let mut bytes = 1; | 
|---|
| 219 | let mut end_location = start_location; | 
|---|
| 220 | let is_whitespace = byte.is_ascii_whitespace(); | 
|---|
| 221 |  | 
|---|
| 222 | while let Some((_, location)) = iter.next_if(|&(byte, _)| { | 
|---|
| 223 | !matches!(byte, b'\\ '| b'['| b']') | 
|---|
| 224 | && is_whitespace == byte.is_ascii_whitespace() | 
|---|
| 225 | }) { | 
|---|
| 226 | end_location = location; | 
|---|
| 227 | bytes += 1; | 
|---|
| 228 | } | 
|---|
| 229 |  | 
|---|
| 230 | let value = &input[..bytes]; | 
|---|
| 231 | input = &input[bytes..]; | 
|---|
| 232 |  | 
|---|
| 233 | Token::ComponentPart { | 
|---|
| 234 | kind: if is_whitespace { | 
|---|
| 235 | ComponentKind::Whitespace | 
|---|
| 236 | } else { | 
|---|
| 237 | ComponentKind::NotWhitespace | 
|---|
| 238 | }, | 
|---|
| 239 | value: value.spanned(start_location.to(end_location)), | 
|---|
| 240 | } | 
|---|
| 241 | } | 
|---|
| 242 | })) | 
|---|
| 243 | }); | 
|---|
| 244 |  | 
|---|
| 245 | Lexed { | 
|---|
| 246 | iter: iter.peekable(), | 
|---|
| 247 | } | 
|---|
| 248 | } | 
|---|
| 249 |  | 
|---|