1 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
2 | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
3 | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
4 | // option. This file may not be copied, modified, or distributed |
5 | // except according to those terms. |
6 | |
7 | use fmt; |
8 | use tendril::{Atomicity, Tendril}; |
9 | use utf8; |
10 | |
11 | pub struct IncompleteUtf8(utf8::Incomplete); |
12 | |
13 | impl<A> Tendril<fmt::Bytes, A> |
14 | where |
15 | A: Atomicity, |
16 | { |
17 | pub fn decode_utf8_lossy<F>(mut self, mut push_utf8: F) -> Option<IncompleteUtf8> |
18 | where |
19 | F: FnMut(Tendril<fmt::UTF8, A>), |
20 | { |
21 | loop { |
22 | if self.is_empty() { |
23 | return None; |
24 | } |
25 | let unborrowed_result = match utf8::decode(&self) { |
26 | Ok(s) => { |
27 | debug_assert!(s.as_ptr() == self.as_ptr()); |
28 | debug_assert!(s.len() == self.len()); |
29 | Ok(()) |
30 | } |
31 | Err(utf8::DecodeError::Invalid { |
32 | valid_prefix, |
33 | invalid_sequence, |
34 | .. |
35 | }) => { |
36 | debug_assert!(valid_prefix.as_ptr() == self.as_ptr()); |
37 | debug_assert!(valid_prefix.len() <= self.len()); |
38 | Err(( |
39 | valid_prefix.len(), |
40 | Err(valid_prefix.len() + invalid_sequence.len()), |
41 | )) |
42 | } |
43 | Err(utf8::DecodeError::Incomplete { |
44 | valid_prefix, |
45 | incomplete_suffix, |
46 | }) => { |
47 | debug_assert!(valid_prefix.as_ptr() == self.as_ptr()); |
48 | debug_assert!(valid_prefix.len() <= self.len()); |
49 | Err((valid_prefix.len(), Ok(incomplete_suffix))) |
50 | } |
51 | }; |
52 | match unborrowed_result { |
53 | Ok(()) => { |
54 | unsafe { push_utf8(self.reinterpret_without_validating()) } |
55 | return None; |
56 | } |
57 | Err((valid_len, and_then)) => { |
58 | if valid_len > 0 { |
59 | let subtendril = self.subtendril(0, valid_len as u32); |
60 | unsafe { push_utf8(subtendril.reinterpret_without_validating()) } |
61 | } |
62 | match and_then { |
63 | Ok(incomplete) => return Some(IncompleteUtf8(incomplete)), |
64 | Err(offset) => { |
65 | push_utf8(Tendril::from_slice(utf8::REPLACEMENT_CHARACTER)); |
66 | self.pop_front(offset as u32) |
67 | } |
68 | } |
69 | } |
70 | } |
71 | } |
72 | } |
73 | } |
74 | |
75 | impl IncompleteUtf8 { |
76 | pub fn try_complete<A, F>( |
77 | &mut self, |
78 | mut input: Tendril<fmt::Bytes, A>, |
79 | mut push_utf8: F, |
80 | ) -> Result<Tendril<fmt::Bytes, A>, ()> |
81 | where |
82 | A: Atomicity, |
83 | F: FnMut(Tendril<fmt::UTF8, A>), |
84 | { |
85 | let resume_at: usize; |
86 | match self.0.try_complete(&input) { |
87 | None => return Err(()), |
88 | Some((result: Result<&str, &[u8]>, rest: &[u8])) => { |
89 | push_utf8(Tendril::from_slice( |
90 | result.unwrap_or(default:utf8::REPLACEMENT_CHARACTER), |
91 | )); |
92 | resume_at = input.len() - rest.len(); |
93 | } |
94 | } |
95 | input.pop_front(resume_at as u32); |
96 | Ok(input) |
97 | } |
98 | } |
99 | |