| 1 | //! Fork of the equivalent file from the proc-macro2 file. |
| 2 | //! Modified to support line number counting in Cursor. |
| 3 | //! Also contains some function from stable.rs of proc_macro2. |
| 4 | |
| 5 | #![allow (dead_code)] // Why is this needed ? |
| 6 | |
| 7 | use std::str::{Bytes, CharIndices, Chars}; |
| 8 | |
| 9 | use unicode_xid::UnicodeXID; |
| 10 | |
| 11 | #[derive (Debug)] |
| 12 | pub struct LexError { |
| 13 | pub line: u32, |
| 14 | } |
| 15 | |
| 16 | #[derive (Copy, Clone, Eq, PartialEq)] |
| 17 | pub struct Cursor<'a> { |
| 18 | pub rest: &'a str, |
| 19 | pub off: u32, |
| 20 | pub line: u32, |
| 21 | pub column: u32, |
| 22 | } |
| 23 | |
| 24 | impl<'a> Cursor<'a> { |
| 25 | #[allow (clippy::suspicious_map)] |
| 26 | pub fn advance(&self, amt: usize) -> Cursor<'a> { |
| 27 | let mut column_start: Option<usize> = None; |
| 28 | Cursor { |
| 29 | rest: &self.rest[amt..], |
| 30 | off: self.off + (amt as u32), |
| 31 | line: self.line |
| 32 | + self.rest[..amt] |
| 33 | .char_indices() |
| 34 | .filter(|(_, x)| *x == ' \n' ) |
| 35 | .map(|(i, _)| { |
| 36 | column_start = Some(i); |
| 37 | }) |
| 38 | .count() as u32, |
| 39 | column: match column_start { |
| 40 | None => self.column + (amt as u32), |
| 41 | Some(i) => (amt - i) as u32 - 1, |
| 42 | }, |
| 43 | } |
| 44 | } |
| 45 | |
| 46 | pub fn find(&self, p: char) -> Option<usize> { |
| 47 | self.rest.find(p) |
| 48 | } |
| 49 | |
| 50 | pub fn starts_with(&self, s: &str) -> bool { |
| 51 | self.rest.starts_with(s) |
| 52 | } |
| 53 | |
| 54 | pub fn is_empty(&self) -> bool { |
| 55 | self.rest.is_empty() |
| 56 | } |
| 57 | |
| 58 | pub fn len(&self) -> usize { |
| 59 | self.rest.len() |
| 60 | } |
| 61 | |
| 62 | pub fn as_bytes(&self) -> &'a [u8] { |
| 63 | self.rest.as_bytes() |
| 64 | } |
| 65 | |
| 66 | pub fn bytes(&self) -> Bytes<'a> { |
| 67 | self.rest.bytes() |
| 68 | } |
| 69 | |
| 70 | pub fn chars(&self) -> Chars<'a> { |
| 71 | self.rest.chars() |
| 72 | } |
| 73 | |
| 74 | pub fn char_indices(&self) -> CharIndices<'a> { |
| 75 | self.rest.char_indices() |
| 76 | } |
| 77 | } |
| 78 | |
| 79 | pub type PResult<'a, O> = Result<(Cursor<'a>, O), LexError>; |
| 80 | |
| 81 | pub fn whitespace(input: Cursor) -> PResult<()> { |
| 82 | if input.is_empty() { |
| 83 | return Err(LexError { line: input.line }); |
| 84 | } |
| 85 | |
| 86 | let bytes = input.as_bytes(); |
| 87 | let mut i = 0; |
| 88 | while i < bytes.len() { |
| 89 | let s = input.advance(i); |
| 90 | if bytes[i] == b'/' { |
| 91 | if s.starts_with("//" ) |
| 92 | // && (!s.starts_with("///") || s.starts_with("////")) |
| 93 | // && !s.starts_with("//!") |
| 94 | { |
| 95 | if let Some(len) = s.find(' \n' ) { |
| 96 | i += len + 1; |
| 97 | continue; |
| 98 | } |
| 99 | break; |
| 100 | } else if s.starts_with("/**/" ) { |
| 101 | i += 4; |
| 102 | continue; |
| 103 | } else if s.starts_with("/*" ) |
| 104 | // && (!s.starts_with("/**") || s.starts_with("/***")) |
| 105 | // && !s.starts_with("/*!") |
| 106 | { |
| 107 | let (_, com) = block_comment(s)?; |
| 108 | i += com.len(); |
| 109 | continue; |
| 110 | } |
| 111 | } |
| 112 | match bytes[i] { |
| 113 | b' ' | 0x09..=0x0d => { |
| 114 | i += 1; |
| 115 | continue; |
| 116 | } |
| 117 | b if b <= 0x7f => {} |
| 118 | _ => { |
| 119 | let ch = s.chars().next().unwrap(); |
| 120 | if is_whitespace(ch) { |
| 121 | i += ch.len_utf8(); |
| 122 | continue; |
| 123 | } |
| 124 | } |
| 125 | } |
| 126 | return if i > 0 { Ok((s, ())) } else { Err(LexError { line: s.line }) }; |
| 127 | } |
| 128 | Ok((input.advance(input.len()), ())) |
| 129 | } |
| 130 | |
| 131 | pub fn block_comment(input: Cursor) -> PResult<&str> { |
| 132 | if !input.starts_with("/*" ) { |
| 133 | return Err(LexError { line: input.line }); |
| 134 | } |
| 135 | |
| 136 | let mut depth: i32 = 0; |
| 137 | let bytes: &[u8] = input.as_bytes(); |
| 138 | let mut i: usize = 0; |
| 139 | let upper: usize = bytes.len() - 1; |
| 140 | while i < upper { |
| 141 | if bytes[i] == b'/' && bytes[i + 1] == b'*' { |
| 142 | depth += 1; |
| 143 | i += 1; // eat '*' |
| 144 | } else if bytes[i] == b'*' && bytes[i + 1] == b'/' { |
| 145 | depth -= 1; |
| 146 | if depth == 0 { |
| 147 | return Ok((input.advance(amt:i + 2), &input.rest[..i + 2])); |
| 148 | } |
| 149 | i += 1; // eat '/' |
| 150 | } |
| 151 | i += 1; |
| 152 | } |
| 153 | Err(LexError { line: input.line }) |
| 154 | } |
| 155 | |
| 156 | pub fn skip_whitespace(input: Cursor) -> Cursor { |
| 157 | match whitespace(input) { |
| 158 | Ok((rest: Cursor<'_>, _)) => rest, |
| 159 | Err(_) => input, |
| 160 | } |
| 161 | } |
| 162 | |
| 163 | fn is_whitespace(ch: char) -> bool { |
| 164 | // Rust treats left-to-right mark and right-to-left mark as whitespace |
| 165 | ch.is_whitespace() || ch == ' \u{200e}' || ch == ' \u{200f}' |
| 166 | } |
| 167 | |
| 168 | // --- functions from stable.rs |
| 169 | |
| 170 | #[inline ] |
| 171 | fn is_ident_start(c: char) -> bool { |
| 172 | c.is_ascii_alphabetic() || c == '_' || (c > ' \x7f' && UnicodeXID::is_xid_start(self:c)) |
| 173 | } |
| 174 | |
| 175 | #[inline ] |
| 176 | fn is_ident_continue(c: char) -> bool { |
| 177 | c.is_ascii_alphanumeric() || c == '_' || (c > ' \x7f' && UnicodeXID::is_xid_continue(self:c)) |
| 178 | } |
| 179 | |
| 180 | pub fn symbol(input: Cursor) -> PResult<&str> { |
| 181 | let mut chars = input.char_indices(); |
| 182 | |
| 183 | let raw = input.starts_with("r#" ); |
| 184 | if raw { |
| 185 | chars.next(); |
| 186 | chars.next(); |
| 187 | } |
| 188 | |
| 189 | match chars.next() { |
| 190 | Some((_, ch)) if is_ident_start(ch) => {} |
| 191 | _ => return Err(LexError { line: input.line }), |
| 192 | } |
| 193 | |
| 194 | let mut end = input.len(); |
| 195 | for (i, ch) in chars { |
| 196 | if !is_ident_continue(ch) { |
| 197 | end = i; |
| 198 | break; |
| 199 | } |
| 200 | } |
| 201 | |
| 202 | let a = &input.rest[..end]; |
| 203 | if a == "r#_" { |
| 204 | Err(LexError { line: input.line }) |
| 205 | } else { |
| 206 | let ident = if raw { &a[2..] } else { a }; |
| 207 | Ok((input.advance(end), ident)) |
| 208 | } |
| 209 | } |
| 210 | |
| 211 | pub fn cooked_string(input: Cursor) -> PResult<()> { |
| 212 | let mut chars = input.char_indices().peekable(); |
| 213 | while let Some((byte_offset, ch)) = chars.next() { |
| 214 | match ch { |
| 215 | '"' => { |
| 216 | return Ok((input.advance(byte_offset), ())); |
| 217 | } |
| 218 | ' \r' => { |
| 219 | if let Some((_, ' \n' )) = chars.next() { |
| 220 | // ... |
| 221 | } else { |
| 222 | break; |
| 223 | } |
| 224 | } |
| 225 | ' \\' => match chars.next() { |
| 226 | Some((_, 'x' )) => { |
| 227 | if !backslash_x_char(&mut chars) { |
| 228 | break; |
| 229 | } |
| 230 | } |
| 231 | Some((_, 'n' )) | Some((_, 'r' )) | Some((_, 't' )) | Some((_, ' \\' )) |
| 232 | | Some((_, ' \'' )) | Some((_, '"' )) | Some((_, '0' )) => {} |
| 233 | Some((_, 'u' )) => { |
| 234 | if !backslash_u(&mut chars) { |
| 235 | break; |
| 236 | } |
| 237 | } |
| 238 | Some((_, ' \n' )) | Some((_, ' \r' )) => { |
| 239 | while let Some(&(_, ch)) = chars.peek() { |
| 240 | if ch.is_whitespace() { |
| 241 | chars.next(); |
| 242 | } else { |
| 243 | break; |
| 244 | } |
| 245 | } |
| 246 | } |
| 247 | _ => break, |
| 248 | }, |
| 249 | _ch => {} |
| 250 | } |
| 251 | } |
| 252 | Err(LexError { line: input.line }) |
| 253 | } |
| 254 | |
| 255 | pub fn cooked_byte_string(mut input: Cursor) -> PResult<()> { |
| 256 | let mut bytes = input.bytes().enumerate(); |
| 257 | 'outer: while let Some((offset, b)) = bytes.next() { |
| 258 | match b { |
| 259 | b'"' => { |
| 260 | return Ok((input.advance(offset), ())); |
| 261 | } |
| 262 | b' \r' => { |
| 263 | if let Some((_, b' \n' )) = bytes.next() { |
| 264 | // ... |
| 265 | } else { |
| 266 | break; |
| 267 | } |
| 268 | } |
| 269 | b' \\' => match bytes.next() { |
| 270 | Some((_, b'x' )) => { |
| 271 | if !backslash_x_byte(&mut bytes) { |
| 272 | break; |
| 273 | } |
| 274 | } |
| 275 | Some((_, b'n' )) | Some((_, b'r' )) | Some((_, b't' )) | Some((_, b' \\' )) |
| 276 | | Some((_, b'0' )) | Some((_, b' \'' )) | Some((_, b'"' )) => {} |
| 277 | Some((newline, b' \n' )) | Some((newline, b' \r' )) => { |
| 278 | let rest = input.advance(newline + 1); |
| 279 | for (offset, ch) in rest.char_indices() { |
| 280 | if !ch.is_whitespace() { |
| 281 | input = rest.advance(offset); |
| 282 | bytes = input.bytes().enumerate(); |
| 283 | continue 'outer; |
| 284 | } |
| 285 | } |
| 286 | break; |
| 287 | } |
| 288 | _ => break, |
| 289 | }, |
| 290 | b if b < 0x80 => {} |
| 291 | _ => break, |
| 292 | } |
| 293 | } |
| 294 | Err(LexError { line: input.line }) |
| 295 | } |
| 296 | |
| 297 | pub fn raw_string(input: Cursor) -> PResult<()> { |
| 298 | let mut chars = input.char_indices(); |
| 299 | let mut n = 0; |
| 300 | #[allow (clippy::while_let_on_iterator)] //chars is used in the next loop |
| 301 | while let Some((byte_offset, ch)) = chars.next() { |
| 302 | match ch { |
| 303 | '"' => { |
| 304 | n = byte_offset; |
| 305 | break; |
| 306 | } |
| 307 | '#' => {} |
| 308 | _ => return Err(LexError { line: input.line }), |
| 309 | } |
| 310 | } |
| 311 | for (byte_offset, ch) in chars { |
| 312 | match ch { |
| 313 | '"' if input.advance(byte_offset + 1).starts_with(&input.rest[..n]) => { |
| 314 | let rest = input.advance(byte_offset + 1 + n); |
| 315 | return Ok((rest, ())); |
| 316 | } |
| 317 | ' \r' => {} |
| 318 | _ => {} |
| 319 | } |
| 320 | } |
| 321 | Err(LexError { line: input.line }) |
| 322 | } |
| 323 | |
| 324 | pub fn cooked_byte(input: Cursor) -> PResult<()> { |
| 325 | let mut bytes = input.bytes().enumerate(); |
| 326 | let ok = match bytes.next().map(|(_, b)| b) { |
| 327 | Some(b' \\' ) => match bytes.next().map(|(_, b)| b) { |
| 328 | Some(b'x' ) => backslash_x_byte(&mut bytes), |
| 329 | Some(b'n' ) | Some(b'r' ) | Some(b't' ) | Some(b' \\' ) | Some(b'0' ) | Some(b' \'' ) |
| 330 | | Some(b'"' ) => true, |
| 331 | _ => false, |
| 332 | }, |
| 333 | b => b.is_some(), |
| 334 | }; |
| 335 | if ok { |
| 336 | match bytes.next() { |
| 337 | Some((offset, _)) => { |
| 338 | if input.chars().as_str().is_char_boundary(offset) { |
| 339 | Ok((input.advance(offset), ())) |
| 340 | } else { |
| 341 | Err(LexError { line: input.line }) |
| 342 | } |
| 343 | } |
| 344 | None => Ok((input.advance(input.len()), ())), |
| 345 | } |
| 346 | } else { |
| 347 | Err(LexError { line: input.line }) |
| 348 | } |
| 349 | } |
| 350 | |
| 351 | pub fn cooked_char(input: Cursor) -> PResult<()> { |
| 352 | let mut chars: CharIndices<'_> = input.char_indices(); |
| 353 | let ok: bool = match chars.next().map(|(_, ch: char)| ch) { |
| 354 | Some(' \\' ) => match chars.next().map(|(_, ch: char)| ch) { |
| 355 | Some('x' ) => backslash_x_char(&mut chars), |
| 356 | Some('u' ) => backslash_u(&mut chars), |
| 357 | Some('n' ) | Some('r' ) | Some('t' ) | Some(' \\' ) | Some('0' ) | Some(' \'' ) | Some('"' ) => { |
| 358 | true |
| 359 | } |
| 360 | _ => false, |
| 361 | }, |
| 362 | ch: Option => ch.is_some(), |
| 363 | }; |
| 364 | if ok { |
| 365 | match chars.next() { |
| 366 | Some((idx: usize, _)) => Ok((input.advance(amt:idx), ())), |
| 367 | None => Ok((input.advance(amt:input.len()), ())), |
| 368 | } |
| 369 | } else { |
| 370 | Err(LexError { line: input.line }) |
| 371 | } |
| 372 | } |
| 373 | |
| 374 | macro_rules! next_ch { |
| 375 | ($chars:ident @ $pat:pat $(| $rest:pat)*) => { |
| 376 | match $chars.next() { |
| 377 | Some((_, ch)) => match ch { |
| 378 | $pat $(| $rest)* => ch, |
| 379 | _ => return false, |
| 380 | }, |
| 381 | None => return false |
| 382 | } |
| 383 | }; |
| 384 | } |
| 385 | |
| 386 | fn backslash_x_char<I>(chars: &mut I) -> bool |
| 387 | where |
| 388 | I: Iterator<Item = (usize, char)>, |
| 389 | { |
| 390 | next_ch!(chars @ '0' ..='7' ); |
| 391 | next_ch!(chars @ '0' ..='9' | 'a' ..='f' | 'A' ..='F' ); |
| 392 | true |
| 393 | } |
| 394 | |
| 395 | fn backslash_x_byte<I>(chars: &mut I) -> bool |
| 396 | where |
| 397 | I: Iterator<Item = (usize, u8)>, |
| 398 | { |
| 399 | next_ch!(chars @ b'0' ..=b'9' | b'a' ..=b'f' | b'A' ..=b'F' ); |
| 400 | next_ch!(chars @ b'0' ..=b'9' | b'a' ..=b'f' | b'A' ..=b'F' ); |
| 401 | true |
| 402 | } |
| 403 | |
| 404 | fn backslash_u<I>(chars: &mut I) -> bool |
| 405 | where |
| 406 | I: Iterator<Item = (usize, char)>, |
| 407 | { |
| 408 | next_ch!(chars @ '{' ); |
| 409 | next_ch!(chars @ '0' ..='9' | 'a' ..='f' | 'A' ..='F' ); |
| 410 | loop { |
| 411 | let c: char = next_ch!(chars @ '0' ..='9' | 'a' ..='f' | 'A' ..='F' | '_' | '}' ); |
| 412 | if c == '}' { |
| 413 | return true; |
| 414 | } |
| 415 | } |
| 416 | } |
| 417 | |