1 | use super::*; |
2 | |
3 | /// A push-based, lossy decoder for UTF-8. |
4 | /// Errors are replaced with the U+FFFD replacement character. |
5 | /// |
6 | /// Users “push” bytes into the decoder, which in turn “pushes” `&str` slices into a callback. |
7 | /// |
8 | /// For example, `String::from_utf8_lossy` (but returning `String` instead of `Cow`) |
9 | /// can be rewritten as: |
10 | /// |
11 | /// ```rust |
12 | /// fn string_from_utf8_lossy(input: &[u8]) -> String { |
13 | /// let mut string = String::new(); |
14 | /// utf8::LossyDecoder::new(|s| string.push_str(s)).feed(input); |
15 | /// string |
16 | /// } |
17 | /// ``` |
18 | /// |
19 | /// **Note:** Dropping the decoder signals the end of the input: |
20 | /// If the last input chunk ended with an incomplete byte sequence for a code point, |
21 | /// this is an error and a replacement character is emitted. |
22 | /// Use `std::mem::forget` to inhibit this behavior. |
23 | pub struct LossyDecoder<F: FnMut(&str)> { |
24 | push_str: F, |
25 | incomplete: Incomplete, |
26 | } |
27 | |
28 | impl<F: FnMut(&str)> LossyDecoder<F> { |
29 | /// Create a new decoder from a callback. |
30 | #[inline ] |
31 | pub fn new(push_str: F) -> Self { |
32 | LossyDecoder { |
33 | push_str: push_str, |
34 | incomplete: Incomplete { |
35 | buffer: [0, 0, 0, 0], |
36 | buffer_len: 0, |
37 | }, |
38 | } |
39 | } |
40 | |
41 | /// Feed one chunk of input into the decoder. |
42 | /// |
43 | /// The input is decoded lossily |
44 | /// and the callback called once or more with `&str` string slices. |
45 | /// |
46 | /// If the UTF-8 byte sequence for one code point was split into this bytes chunk |
47 | /// and previous bytes chunks, it will be correctly pieced back together. |
48 | pub fn feed(&mut self, mut input: &[u8]) { |
49 | if self.incomplete.buffer_len > 0 { |
50 | match self.incomplete.try_complete(input) { |
51 | Some((Ok(s), remaining)) => { |
52 | (self.push_str)(s); |
53 | input = remaining |
54 | } |
55 | Some((Err(_), remaining)) => { |
56 | (self.push_str)(REPLACEMENT_CHARACTER); |
57 | input = remaining |
58 | } |
59 | None => { |
60 | return |
61 | } |
62 | } |
63 | } |
64 | loop { |
65 | match decode(input) { |
66 | Ok(s) => { |
67 | (self.push_str)(s); |
68 | return |
69 | } |
70 | Err(DecodeError::Incomplete { valid_prefix, incomplete_suffix }) => { |
71 | (self.push_str)(valid_prefix); |
72 | self.incomplete = incomplete_suffix; |
73 | return |
74 | } |
75 | Err(DecodeError::Invalid { valid_prefix, remaining_input, .. }) => { |
76 | (self.push_str)(valid_prefix); |
77 | (self.push_str)(REPLACEMENT_CHARACTER); |
78 | input = remaining_input |
79 | } |
80 | } |
81 | } |
82 | } |
83 | } |
84 | |
85 | impl<F: FnMut(&str)> Drop for LossyDecoder<F> { |
86 | #[inline ] |
87 | fn drop(&mut self) { |
88 | if self.incomplete.buffer_len > 0 { |
89 | (self.push_str)(REPLACEMENT_CHARACTER) |
90 | } |
91 | } |
92 | } |
93 | |