1 | use std::ops::{Index, RangeFrom}; |
2 | |
3 | use proc_macro::Span; |
4 | |
5 | use crate::Error; |
6 | |
7 | pub(crate) fn parse(token: &proc_macro::Literal) -> Result<(Span, Vec<u8>), Error> { |
8 | let span: Span = token.span(); |
9 | let repr: String = token.to_string(); |
10 | |
11 | match repr.as_bytes() { |
12 | [b'"' , ..] => Ok((span, parse_lit_str_cooked(&repr[1..]))), |
13 | [b'b' , b'"' , rest: &[u8] @ ..] => Ok((span, parse_lit_byte_str_cooked(rest))), |
14 | [b'r' , rest: &[u8] @ ..] | [b'b' , b'r' , rest: &[u8] @ ..] => Ok((span, parse_lit_str_raw(rest))), |
15 | _ => Err(Error::ExpectedString { |
16 | span_start: Some(span), |
17 | span_end: Some(span), |
18 | }), |
19 | } |
20 | } |
21 | |
22 | fn byte(s: impl AsRef<[u8]>, idx: usize) -> u8 { |
23 | s.as_ref().get(index:idx).copied().unwrap_or_default() |
24 | } |
25 | |
26 | fn parse_lit_str_cooked(mut s: &str) -> Vec<u8> { |
27 | let mut content = String::new(); |
28 | 'outer: loop { |
29 | let ch = match byte(s, 0) { |
30 | b'"' => break, |
31 | b' \\' => { |
32 | let b = byte(s, 1); |
33 | s = &s[2..]; |
34 | match b { |
35 | b'x' => { |
36 | let (byte, rest) = backslash_x(s); |
37 | s = rest; |
38 | char::from_u32(u32::from(byte)).expect("byte was just validated" ) |
39 | } |
40 | b'u' => { |
41 | let (chr, rest) = backslash_u(s); |
42 | s = rest; |
43 | chr |
44 | } |
45 | b'n' => ' \n' , |
46 | b'r' => ' \r' , |
47 | b't' => ' \t' , |
48 | b' \\' => ' \\' , |
49 | b'0' => ' \0' , |
50 | b' \'' => ' \'' , |
51 | b'"' => '"' , |
52 | b' \r' | b' \n' => loop { |
53 | let ch = s.chars().next().unwrap_or_default(); |
54 | if ch.is_whitespace() { |
55 | s = &s[ch.len_utf8()..]; |
56 | } else { |
57 | continue 'outer; |
58 | } |
59 | }, |
60 | _ => bug!("invalid escape" ), |
61 | } |
62 | } |
63 | b' \r' => { |
64 | // bare CR not permitted |
65 | s = &s[2..]; |
66 | ' \n' |
67 | } |
68 | _ => { |
69 | let ch = s.chars().next().unwrap_or_default(); |
70 | s = &s[ch.len_utf8()..]; |
71 | ch |
72 | } |
73 | }; |
74 | content.push(ch); |
75 | } |
76 | |
77 | content.into_bytes() |
78 | } |
79 | |
80 | fn parse_lit_str_raw(s: &[u8]) -> Vec<u8> { |
81 | let mut pounds: usize = 0; |
82 | while byte(s, idx:pounds) == b'#' { |
83 | pounds += 1; |
84 | } |
85 | let close: usize = s |
86 | .iter() |
87 | .rposition(|&b| b == b'"' ) |
88 | .expect(msg:"had a string without trailing \"" ); |
89 | |
90 | s[pounds + 1..close].to_owned() |
91 | } |
92 | |
93 | fn parse_lit_byte_str_cooked(mut v: &[u8]) -> Vec<u8> { |
94 | let mut out = Vec::new(); |
95 | 'outer: loop { |
96 | let byte = match byte(v, 0) { |
97 | b'"' => break, |
98 | b' \\' => { |
99 | let b = byte(v, 1); |
100 | v = &v[2..]; |
101 | match b { |
102 | b'x' => { |
103 | let (byte, rest) = backslash_x(v); |
104 | v = rest; |
105 | byte |
106 | } |
107 | b'n' => b' \n' , |
108 | b'r' => b' \r' , |
109 | b't' => b' \t' , |
110 | b' \\' => b' \\' , |
111 | b'0' => b' \0' , |
112 | b' \'' => b' \'' , |
113 | b'"' => b'"' , |
114 | b' \r' | b' \n' => loop { |
115 | let byte = byte(v, 0); |
116 | let ch = char::from_u32(u32::from(byte)).expect("invalid byte" ); |
117 | if ch.is_whitespace() { |
118 | v = &v[1..]; |
119 | } else { |
120 | continue 'outer; |
121 | } |
122 | }, |
123 | _ => bug!("invalid escape" ), |
124 | } |
125 | } |
126 | b' \r' => { |
127 | // bare CR not permitted |
128 | v = &v[2..]; |
129 | b' \n' |
130 | } |
131 | b => { |
132 | v = &v[1..]; |
133 | b |
134 | } |
135 | }; |
136 | out.push(byte); |
137 | } |
138 | |
139 | out |
140 | } |
141 | |
142 | fn backslash_x<S>(s: &S) -> (u8, &S) |
143 | where |
144 | S: Index<RangeFrom<usize>, Output = S> + AsRef<[u8]> + ?Sized, |
145 | { |
146 | let mut ch: u8 = 0; |
147 | let b0: u8 = byte(s, idx:0); |
148 | let b1: u8 = byte(s, idx:1); |
149 | ch += 0x10 * (b0 - b'0' ); |
150 | ch += match b1 { |
151 | b'0' ..=b'9' => b1 - b'0' , |
152 | b'a' ..=b'f' => 10 + (b1 - b'a' ), |
153 | b'A' ..=b'F' => 10 + (b1 - b'A' ), |
154 | _ => bug!("invalid hex escape" ), |
155 | }; |
156 | (ch, &s[2..]) |
157 | } |
158 | |
159 | fn backslash_u(mut s: &str) -> (char, &str) { |
160 | s = &s[1..]; |
161 | |
162 | let mut ch = 0; |
163 | let mut digits = 0; |
164 | loop { |
165 | let b = byte(s, 0); |
166 | let digit = match b { |
167 | b'0' ..=b'9' => b - b'0' , |
168 | b'a' ..=b'f' => 10 + b - b'a' , |
169 | b'A' ..=b'F' => 10 + b - b'A' , |
170 | b'_' if digits > 0 => { |
171 | s = &s[1..]; |
172 | continue; |
173 | } |
174 | b'}' if digits != 0 => break, |
175 | _ => bug!("invalid unicode escape" ), |
176 | }; |
177 | ch *= 0x10; |
178 | ch += u32::from(digit); |
179 | digits += 1; |
180 | s = &s[1..]; |
181 | } |
182 | s = &s[1..]; |
183 | |
184 | ( |
185 | char::from_u32(ch).expect("invalid unicode escape passed by compiler" ), |
186 | s, |
187 | ) |
188 | } |
189 | |