| 1 | // `bytes`, `next_chr`, `parse_lit_str`, `parse_lit_str_cooked` and `parse_lit_str_raw` are adapted |
| 2 | // from syn: |
| 3 | // https://github.com/dtolnay/syn/blob/362ee2d02df3f1b2e74c7b7a4cf2ed3c106404c9/src/lit.rs#L1062-L1167 |
| 4 | // and |
| 5 | // https://github.com/dtolnay/syn/blob/362ee2d02df3f1b2e74c7b7a4cf2ed3c106404c9/src/lit.rs#L1327-L1388 |
| 6 | |
| 7 | /// Get the byte at offset idx, or a default of `b'\0'` if we're looking |
| 8 | /// past the end of the input buffer. |
| 9 | fn byte(s: &str, idx: usize) -> u8 { |
| 10 | if idx < s.len() { |
| 11 | s.as_bytes()[idx] |
| 12 | } else { |
| 13 | 0 |
| 14 | } |
| 15 | } |
| 16 | |
| 17 | fn next_chr(s: &str) -> char { |
| 18 | s.chars().next().unwrap_or(default:' \0' ) |
| 19 | } |
| 20 | |
| 21 | // Returns (content, suffix). |
| 22 | fn parse_lit_str(s: &str) -> String { |
| 23 | match byte(s, idx:0) { |
| 24 | b'"' => parse_lit_str_cooked(s), |
| 25 | b'r' => parse_lit_str_raw(s), |
| 26 | _ => unreachable!(), |
| 27 | } |
| 28 | } |
| 29 | |
| 30 | // Clippy false positive |
| 31 | // https://github.com/rust-lang-nursery/rust-clippy/issues/2329 |
| 32 | #[allow (clippy::needless_continue)] |
| 33 | fn parse_lit_str_cooked(mut s: &str) -> String { |
| 34 | assert_eq!(byte(s, 0), b'"' ); |
| 35 | s = &s[1..]; |
| 36 | |
| 37 | let mut content = String::new(); |
| 38 | 'outer: loop { |
| 39 | let ch = match byte(s, 0) { |
| 40 | b'"' => break, |
| 41 | b' \\' => { |
| 42 | let b = byte(s, 1); |
| 43 | s = &s[2..]; |
| 44 | match b { |
| 45 | b'x' => { |
| 46 | let (byte, rest) = backslash_x(s); |
| 47 | s = rest; |
| 48 | assert!(byte <= 0x80, "Invalid \\x byte in string literal" ); |
| 49 | char::from_u32(u32::from(byte)).unwrap() |
| 50 | } |
| 51 | b'u' => { |
| 52 | let (chr, rest) = backslash_u(s); |
| 53 | s = rest; |
| 54 | chr |
| 55 | } |
| 56 | b'n' => ' \n' , |
| 57 | b'r' => ' \r' , |
| 58 | b't' => ' \t' , |
| 59 | b' \\' => ' \\' , |
| 60 | b'0' => ' \0' , |
| 61 | b' \'' => ' \'' , |
| 62 | b'"' => '"' , |
| 63 | b' \r' | b' \n' => loop { |
| 64 | let ch = next_chr(s); |
| 65 | if ch.is_whitespace() { |
| 66 | s = &s[ch.len_utf8()..]; |
| 67 | } else { |
| 68 | continue 'outer; |
| 69 | } |
| 70 | }, |
| 71 | b => panic!("unexpected byte {:?} after \\ character in byte literal" , b), |
| 72 | } |
| 73 | } |
| 74 | b' \r' => { |
| 75 | assert_eq!(byte(s, 1), b' \n' , "Bare CR not allowed in string" ); |
| 76 | s = &s[2..]; |
| 77 | ' \n' |
| 78 | } |
| 79 | _ => { |
| 80 | let ch = next_chr(s); |
| 81 | s = &s[ch.len_utf8()..]; |
| 82 | ch |
| 83 | } |
| 84 | }; |
| 85 | content.push(ch); |
| 86 | } |
| 87 | |
| 88 | assert!(s.starts_with('"' )); |
| 89 | content |
| 90 | } |
| 91 | |
| 92 | fn parse_lit_str_raw(mut s: &str) -> String { |
| 93 | assert_eq!(byte(s, 0), b'r' ); |
| 94 | s = &s[1..]; |
| 95 | |
| 96 | let mut pounds: usize = 0; |
| 97 | while byte(s, idx:pounds) == b'#' { |
| 98 | pounds += 1; |
| 99 | } |
| 100 | assert_eq!(byte(s, pounds), b'"' ); |
| 101 | let close: usize = s.rfind('"' ).unwrap(); |
| 102 | for end: u8 in s[close + 1..close + 1 + pounds].bytes() { |
| 103 | assert_eq!(end, b'#' ); |
| 104 | } |
| 105 | |
| 106 | s[pounds + 1..close].to_owned() |
| 107 | } |
| 108 | |
| 109 | fn backslash_x(s: &str) -> (u8, &str) { |
| 110 | let mut ch: u8 = 0; |
| 111 | let b0: u8 = byte(s, idx:0); |
| 112 | let b1: u8 = byte(s, idx:1); |
| 113 | ch += 0x10 |
| 114 | * match b0 { |
| 115 | b'0' ..=b'9' => b0 - b'0' , |
| 116 | b'a' ..=b'f' => 10 + (b0 - b'a' ), |
| 117 | b'A' ..=b'F' => 10 + (b0 - b'A' ), |
| 118 | _ => panic!("unexpected non-hex character after \\x" ), |
| 119 | }; |
| 120 | ch += match b1 { |
| 121 | b'0' ..=b'9' => b1 - b'0' , |
| 122 | b'a' ..=b'f' => 10 + (b1 - b'a' ), |
| 123 | b'A' ..=b'F' => 10 + (b1 - b'A' ), |
| 124 | _ => panic!("unexpected non-hex character after \\x" ), |
| 125 | }; |
| 126 | (ch, &s[2..]) |
| 127 | } |
| 128 | |
| 129 | fn backslash_u(mut s: &str) -> (char, &str) { |
| 130 | if byte(s, 0) != b'{' { |
| 131 | panic!("{}" , "expected { after \\u" ); |
| 132 | } |
| 133 | s = &s[1..]; |
| 134 | |
| 135 | let mut ch = 0; |
| 136 | let mut digits = 0; |
| 137 | loop { |
| 138 | let b = byte(s, 0); |
| 139 | let digit = match b { |
| 140 | b'0' ..=b'9' => b - b'0' , |
| 141 | b'a' ..=b'f' => 10 + b - b'a' , |
| 142 | b'A' ..=b'F' => 10 + b - b'A' , |
| 143 | b'_' if digits > 0 => { |
| 144 | s = &s[1..]; |
| 145 | continue; |
| 146 | } |
| 147 | b'}' if digits == 0 => panic!("invalid empty unicode escape" ), |
| 148 | b'}' => break, |
| 149 | _ => panic!("unexpected non-hex character after \\u" ), |
| 150 | }; |
| 151 | if digits == 6 { |
| 152 | panic!("overlong unicode escape (must have at most 6 hex digits)" ); |
| 153 | } |
| 154 | ch *= 0x10; |
| 155 | ch += u32::from(digit); |
| 156 | digits += 1; |
| 157 | s = &s[1..]; |
| 158 | } |
| 159 | assert!(byte(s, 0) == b'}' ); |
| 160 | s = &s[1..]; |
| 161 | |
| 162 | if let Some(ch) = char::from_u32(ch) { |
| 163 | (ch, s) |
| 164 | } else { |
| 165 | panic!("character code {:x} is not a valid unicode character" , ch); |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | // End of code adapted from syn |
| 170 | |
| 171 | pub fn parse_lit_str_token(mut stream: proc_macro::TokenStream) -> String { |
| 172 | loop { |
| 173 | let mut iter: IntoIter = stream.into_iter(); |
| 174 | let token: TokenTree = iter.next().expect(msg:"expected string argument" ); |
| 175 | assert!(iter.next().is_none(), "unexpected trailing token" ); |
| 176 | let literal: Literal = match token { |
| 177 | proc_macro::TokenTree::Literal(literal: Literal) => literal, |
| 178 | proc_macro::TokenTree::Group(group: Group) => { |
| 179 | stream = group.stream(); |
| 180 | continue; |
| 181 | } |
| 182 | _ => panic!("expected string argument found ` {:?}`" , token), |
| 183 | }; |
| 184 | return parse_lit_str(&literal.to_string()); |
| 185 | } |
| 186 | } |
| 187 | |