1 | // `bytes`, `next_chr`, `parse_lit_str`, `parse_lit_str_cooked` and `parse_lit_str_raw` are adapted |
2 | // from syn: |
3 | // https://github.com/dtolnay/syn/blob/362ee2d02df3f1b2e74c7b7a4cf2ed3c106404c9/src/lit.rs#L1062-L1167 |
4 | // and |
5 | // https://github.com/dtolnay/syn/blob/362ee2d02df3f1b2e74c7b7a4cf2ed3c106404c9/src/lit.rs#L1327-L1388 |
6 | |
7 | /// Get the byte at offset idx, or a default of `b'\0'` if we're looking |
8 | /// past the end of the input buffer. |
9 | fn byte(s: &str, idx: usize) -> u8 { |
10 | if idx < s.len() { |
11 | s.as_bytes()[idx] |
12 | } else { |
13 | 0 |
14 | } |
15 | } |
16 | |
17 | fn next_chr(s: &str) -> char { |
18 | s.chars().next().unwrap_or(default:' \0' ) |
19 | } |
20 | |
21 | // Returns (content, suffix). |
22 | fn parse_lit_str(s: &str) -> String { |
23 | match byte(s, idx:0) { |
24 | b'"' => parse_lit_str_cooked(s), |
25 | b'r' => parse_lit_str_raw(s), |
26 | _ => unreachable!(), |
27 | } |
28 | } |
29 | |
30 | // Clippy false positive |
31 | // https://github.com/rust-lang-nursery/rust-clippy/issues/2329 |
32 | #[allow (clippy::needless_continue)] |
33 | fn parse_lit_str_cooked(mut s: &str) -> String { |
34 | assert_eq!(byte(s, 0), b'"' ); |
35 | s = &s[1..]; |
36 | |
37 | let mut content = String::new(); |
38 | 'outer: loop { |
39 | let ch = match byte(s, 0) { |
40 | b'"' => break, |
41 | b' \\' => { |
42 | let b = byte(s, 1); |
43 | s = &s[2..]; |
44 | match b { |
45 | b'x' => { |
46 | let (byte, rest) = backslash_x(s); |
47 | s = rest; |
48 | assert!(byte <= 0x80, "Invalid \\x byte in string literal" ); |
49 | char::from_u32(u32::from(byte)).unwrap() |
50 | } |
51 | b'u' => { |
52 | let (chr, rest) = backslash_u(s); |
53 | s = rest; |
54 | chr |
55 | } |
56 | b'n' => ' \n' , |
57 | b'r' => ' \r' , |
58 | b't' => ' \t' , |
59 | b' \\' => ' \\' , |
60 | b'0' => ' \0' , |
61 | b' \'' => ' \'' , |
62 | b'"' => '"' , |
63 | b' \r' | b' \n' => loop { |
64 | let ch = next_chr(s); |
65 | if ch.is_whitespace() { |
66 | s = &s[ch.len_utf8()..]; |
67 | } else { |
68 | continue 'outer; |
69 | } |
70 | }, |
71 | b => panic!("unexpected byte {:?} after \\ character in byte literal" , b), |
72 | } |
73 | } |
74 | b' \r' => { |
75 | assert_eq!(byte(s, 1), b' \n' , "Bare CR not allowed in string" ); |
76 | s = &s[2..]; |
77 | ' \n' |
78 | } |
79 | _ => { |
80 | let ch = next_chr(s); |
81 | s = &s[ch.len_utf8()..]; |
82 | ch |
83 | } |
84 | }; |
85 | content.push(ch); |
86 | } |
87 | |
88 | assert!(s.starts_with('"' )); |
89 | content |
90 | } |
91 | |
92 | fn parse_lit_str_raw(mut s: &str) -> String { |
93 | assert_eq!(byte(s, 0), b'r' ); |
94 | s = &s[1..]; |
95 | |
96 | let mut pounds: usize = 0; |
97 | while byte(s, idx:pounds) == b'#' { |
98 | pounds += 1; |
99 | } |
100 | assert_eq!(byte(s, pounds), b'"' ); |
101 | let close: usize = s.rfind('"' ).unwrap(); |
102 | for end: u8 in s[close + 1..close + 1 + pounds].bytes() { |
103 | assert_eq!(end, b'#' ); |
104 | } |
105 | |
106 | s[pounds + 1..close].to_owned() |
107 | } |
108 | |
109 | fn backslash_x(s: &str) -> (u8, &str) { |
110 | let mut ch: u8 = 0; |
111 | let b0: u8 = byte(s, idx:0); |
112 | let b1: u8 = byte(s, idx:1); |
113 | ch += 0x10 |
114 | * match b0 { |
115 | b'0' ..=b'9' => b0 - b'0' , |
116 | b'a' ..=b'f' => 10 + (b0 - b'a' ), |
117 | b'A' ..=b'F' => 10 + (b0 - b'A' ), |
118 | _ => panic!("unexpected non-hex character after \\x" ), |
119 | }; |
120 | ch += match b1 { |
121 | b'0' ..=b'9' => b1 - b'0' , |
122 | b'a' ..=b'f' => 10 + (b1 - b'a' ), |
123 | b'A' ..=b'F' => 10 + (b1 - b'A' ), |
124 | _ => panic!("unexpected non-hex character after \\x" ), |
125 | }; |
126 | (ch, &s[2..]) |
127 | } |
128 | |
129 | fn backslash_u(mut s: &str) -> (char, &str) { |
130 | if byte(s, 0) != b'{' { |
131 | panic!("{}" , "expected { after \\u" ); |
132 | } |
133 | s = &s[1..]; |
134 | |
135 | let mut ch = 0; |
136 | let mut digits = 0; |
137 | loop { |
138 | let b = byte(s, 0); |
139 | let digit = match b { |
140 | b'0' ..=b'9' => b - b'0' , |
141 | b'a' ..=b'f' => 10 + b - b'a' , |
142 | b'A' ..=b'F' => 10 + b - b'A' , |
143 | b'_' if digits > 0 => { |
144 | s = &s[1..]; |
145 | continue; |
146 | } |
147 | b'}' if digits == 0 => panic!("invalid empty unicode escape" ), |
148 | b'}' => break, |
149 | _ => panic!("unexpected non-hex character after \\u" ), |
150 | }; |
151 | if digits == 6 { |
152 | panic!("overlong unicode escape (must have at most 6 hex digits)" ); |
153 | } |
154 | ch *= 0x10; |
155 | ch += u32::from(digit); |
156 | digits += 1; |
157 | s = &s[1..]; |
158 | } |
159 | assert!(byte(s, 0) == b'}' ); |
160 | s = &s[1..]; |
161 | |
162 | if let Some(ch) = char::from_u32(ch) { |
163 | (ch, s) |
164 | } else { |
165 | panic!("character code {:x} is not a valid unicode character" , ch); |
166 | } |
167 | } |
168 | |
169 | // End of code adapted from syn |
170 | |
171 | pub fn parse_lit_str_token(mut stream: proc_macro::TokenStream) -> String { |
172 | loop { |
173 | let mut iter: IntoIter = stream.into_iter(); |
174 | let token: TokenTree = iter.next().expect(msg:"expected string argument" ); |
175 | assert!(iter.next().is_none(), "unexpected trailing token" ); |
176 | let literal: Literal = match token { |
177 | proc_macro::TokenTree::Literal(literal: Literal) => literal, |
178 | proc_macro::TokenTree::Group(group: Group) => { |
179 | stream = group.stream(); |
180 | continue; |
181 | } |
182 | _ => panic!("expected string argument found ` {:?}`" , token), |
183 | }; |
184 | return parse_lit_str(&literal.to_string()); |
185 | } |
186 | } |
187 | |