| 1 | use super::*; |
| 2 | |
| 3 | /// A push-based, lossy decoder for UTF-8. |
| 4 | /// Errors are replaced with the U+FFFD replacement character. |
| 5 | /// |
| 6 | /// Users “push” bytes into the decoder, which in turn “pushes” `&str` slices into a callback. |
| 7 | /// |
| 8 | /// For example, `String::from_utf8_lossy` (but returning `String` instead of `Cow`) |
| 9 | /// can be rewritten as: |
| 10 | /// |
| 11 | /// ```rust |
| 12 | /// fn string_from_utf8_lossy(input: &[u8]) -> String { |
| 13 | /// let mut string = String::new(); |
| 14 | /// utf8::LossyDecoder::new(|s| string.push_str(s)).feed(input); |
| 15 | /// string |
| 16 | /// } |
| 17 | /// ``` |
| 18 | /// |
| 19 | /// **Note:** Dropping the decoder signals the end of the input: |
| 20 | /// If the last input chunk ended with an incomplete byte sequence for a code point, |
| 21 | /// this is an error and a replacement character is emitted. |
| 22 | /// Use `std::mem::forget` to inhibit this behavior. |
| 23 | pub struct LossyDecoder<F: FnMut(&str)> { |
| 24 | push_str: F, |
| 25 | incomplete: Incomplete, |
| 26 | } |
| 27 | |
| 28 | impl<F: FnMut(&str)> LossyDecoder<F> { |
| 29 | /// Create a new decoder from a callback. |
| 30 | #[inline ] |
| 31 | pub fn new(push_str: F) -> Self { |
| 32 | LossyDecoder { |
| 33 | push_str: push_str, |
| 34 | incomplete: Incomplete { |
| 35 | buffer: [0, 0, 0, 0], |
| 36 | buffer_len: 0, |
| 37 | }, |
| 38 | } |
| 39 | } |
| 40 | |
| 41 | /// Feed one chunk of input into the decoder. |
| 42 | /// |
| 43 | /// The input is decoded lossily |
| 44 | /// and the callback called once or more with `&str` string slices. |
| 45 | /// |
| 46 | /// If the UTF-8 byte sequence for one code point was split into this bytes chunk |
| 47 | /// and previous bytes chunks, it will be correctly pieced back together. |
| 48 | pub fn feed(&mut self, mut input: &[u8]) { |
| 49 | if self.incomplete.buffer_len > 0 { |
| 50 | match self.incomplete.try_complete(input) { |
| 51 | Some((Ok(s), remaining)) => { |
| 52 | (self.push_str)(s); |
| 53 | input = remaining |
| 54 | } |
| 55 | Some((Err(_), remaining)) => { |
| 56 | (self.push_str)(REPLACEMENT_CHARACTER); |
| 57 | input = remaining |
| 58 | } |
| 59 | None => { |
| 60 | return |
| 61 | } |
| 62 | } |
| 63 | } |
| 64 | loop { |
| 65 | match decode(input) { |
| 66 | Ok(s) => { |
| 67 | (self.push_str)(s); |
| 68 | return |
| 69 | } |
| 70 | Err(DecodeError::Incomplete { valid_prefix, incomplete_suffix }) => { |
| 71 | (self.push_str)(valid_prefix); |
| 72 | self.incomplete = incomplete_suffix; |
| 73 | return |
| 74 | } |
| 75 | Err(DecodeError::Invalid { valid_prefix, remaining_input, .. }) => { |
| 76 | (self.push_str)(valid_prefix); |
| 77 | (self.push_str)(REPLACEMENT_CHARACTER); |
| 78 | input = remaining_input |
| 79 | } |
| 80 | } |
| 81 | } |
| 82 | } |
| 83 | } |
| 84 | |
| 85 | impl<F: FnMut(&str)> Drop for LossyDecoder<F> { |
| 86 | #[inline ] |
| 87 | fn drop(&mut self) { |
| 88 | if self.incomplete.buffer_len > 0 { |
| 89 | (self.push_str)(REPLACEMENT_CHARACTER) |
| 90 | } |
| 91 | } |
| 92 | } |
| 93 | |