1 | use crate::lib::*; |
2 | |
3 | const TAG_CONT: u8 = 0b1000_0000; |
4 | const TAG_TWO_B: u8 = 0b1100_0000; |
5 | const TAG_THREE_B: u8 = 0b1110_0000; |
6 | const TAG_FOUR_B: u8 = 0b1111_0000; |
7 | const MAX_ONE_B: u32 = 0x80; |
8 | const MAX_TWO_B: u32 = 0x800; |
9 | const MAX_THREE_B: u32 = 0x10000; |
10 | |
11 | #[inline ] |
12 | pub fn encode(c: char) -> Encode { |
13 | let code: u32 = c as u32; |
14 | let mut buf: [u8; 4] = [0; 4]; |
15 | let pos: usize = if code < MAX_ONE_B { |
16 | buf[3] = code as u8; |
17 | 3 |
18 | } else if code < MAX_TWO_B { |
19 | buf[2] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; |
20 | buf[3] = (code & 0x3F) as u8 | TAG_CONT; |
21 | 2 |
22 | } else if code < MAX_THREE_B { |
23 | buf[1] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; |
24 | buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
25 | buf[3] = (code & 0x3F) as u8 | TAG_CONT; |
26 | 1 |
27 | } else { |
28 | buf[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; |
29 | buf[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT; |
30 | buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
31 | buf[3] = (code & 0x3F) as u8 | TAG_CONT; |
32 | 0 |
33 | }; |
34 | Encode { buf, pos } |
35 | } |
36 | |
37 | pub struct Encode { |
38 | buf: [u8; 4], |
39 | pos: usize, |
40 | } |
41 | |
42 | impl Encode { |
43 | pub fn as_str(&self) -> &str { |
44 | str::from_utf8(&self.buf[self.pos..]).unwrap() |
45 | } |
46 | } |
47 | |