1use super::*;
2
3/// A push-based, lossy decoder for UTF-8.
4/// Errors are replaced with the U+FFFD replacement character.
5///
6/// Users “push” bytes into the decoder, which in turn “pushes” `&str` slices into a callback.
7///
8/// For example, `String::from_utf8_lossy` (but returning `String` instead of `Cow`)
9/// can be rewritten as:
10///
11/// ```rust
12/// fn string_from_utf8_lossy(input: &[u8]) -> String {
13/// let mut string = String::new();
14/// utf8::LossyDecoder::new(|s| string.push_str(s)).feed(input);
15/// string
16/// }
17/// ```
18///
19/// **Note:** Dropping the decoder signals the end of the input:
20/// If the last input chunk ended with an incomplete byte sequence for a code point,
21/// this is an error and a replacement character is emitted.
22/// Use `std::mem::forget` to inhibit this behavior.
23pub struct LossyDecoder<F: FnMut(&str)> {
24 push_str: F,
25 incomplete: Incomplete,
26}
27
28impl<F: FnMut(&str)> LossyDecoder<F> {
29 /// Create a new decoder from a callback.
30 #[inline]
31 pub fn new(push_str: F) -> Self {
32 LossyDecoder {
33 push_str: push_str,
34 incomplete: Incomplete {
35 buffer: [0, 0, 0, 0],
36 buffer_len: 0,
37 },
38 }
39 }
40
41 /// Feed one chunk of input into the decoder.
42 ///
43 /// The input is decoded lossily
44 /// and the callback called once or more with `&str` string slices.
45 ///
46 /// If the UTF-8 byte sequence for one code point was split into this bytes chunk
47 /// and previous bytes chunks, it will be correctly pieced back together.
48 pub fn feed(&mut self, mut input: &[u8]) {
49 if self.incomplete.buffer_len > 0 {
50 match self.incomplete.try_complete(input) {
51 Some((Ok(s), remaining)) => {
52 (self.push_str)(s);
53 input = remaining
54 }
55 Some((Err(_), remaining)) => {
56 (self.push_str)(REPLACEMENT_CHARACTER);
57 input = remaining
58 }
59 None => {
60 return
61 }
62 }
63 }
64 loop {
65 match decode(input) {
66 Ok(s) => {
67 (self.push_str)(s);
68 return
69 }
70 Err(DecodeError::Incomplete { valid_prefix, incomplete_suffix }) => {
71 (self.push_str)(valid_prefix);
72 self.incomplete = incomplete_suffix;
73 return
74 }
75 Err(DecodeError::Invalid { valid_prefix, remaining_input, .. }) => {
76 (self.push_str)(valid_prefix);
77 (self.push_str)(REPLACEMENT_CHARACTER);
78 input = remaining_input
79 }
80 }
81 }
82 }
83}
84
85impl<F: FnMut(&str)> Drop for LossyDecoder<F> {
86 #[inline]
87 fn drop(&mut self) {
88 if self.incomplete.buffer_len > 0 {
89 (self.push_str)(REPLACEMENT_CHARACTER)
90 }
91 }
92}
93