| 1 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 2 | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 3 | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
| 4 | // option. This file may not be copied, modified, or distributed |
| 5 | // except according to those terms. |
| 6 | |
| 7 | use fmt; |
| 8 | use tendril::{Atomicity, Tendril}; |
| 9 | use utf8; |
| 10 | |
| 11 | pub struct IncompleteUtf8(utf8::Incomplete); |
| 12 | |
| 13 | impl<A> Tendril<fmt::Bytes, A> |
| 14 | where |
| 15 | A: Atomicity, |
| 16 | { |
| 17 | pub fn decode_utf8_lossy<F>(mut self, mut push_utf8: F) -> Option<IncompleteUtf8> |
| 18 | where |
| 19 | F: FnMut(Tendril<fmt::UTF8, A>), |
| 20 | { |
| 21 | loop { |
| 22 | if self.is_empty() { |
| 23 | return None; |
| 24 | } |
| 25 | let unborrowed_result = match utf8::decode(&self) { |
| 26 | Ok(s) => { |
| 27 | debug_assert!(s.as_ptr() == self.as_ptr()); |
| 28 | debug_assert!(s.len() == self.len()); |
| 29 | Ok(()) |
| 30 | } |
| 31 | Err(utf8::DecodeError::Invalid { |
| 32 | valid_prefix, |
| 33 | invalid_sequence, |
| 34 | .. |
| 35 | }) => { |
| 36 | debug_assert!(valid_prefix.as_ptr() == self.as_ptr()); |
| 37 | debug_assert!(valid_prefix.len() <= self.len()); |
| 38 | Err(( |
| 39 | valid_prefix.len(), |
| 40 | Err(valid_prefix.len() + invalid_sequence.len()), |
| 41 | )) |
| 42 | } |
| 43 | Err(utf8::DecodeError::Incomplete { |
| 44 | valid_prefix, |
| 45 | incomplete_suffix, |
| 46 | }) => { |
| 47 | debug_assert!(valid_prefix.as_ptr() == self.as_ptr()); |
| 48 | debug_assert!(valid_prefix.len() <= self.len()); |
| 49 | Err((valid_prefix.len(), Ok(incomplete_suffix))) |
| 50 | } |
| 51 | }; |
| 52 | match unborrowed_result { |
| 53 | Ok(()) => { |
| 54 | unsafe { push_utf8(self.reinterpret_without_validating()) } |
| 55 | return None; |
| 56 | } |
| 57 | Err((valid_len, and_then)) => { |
| 58 | if valid_len > 0 { |
| 59 | let subtendril = self.subtendril(0, valid_len as u32); |
| 60 | unsafe { push_utf8(subtendril.reinterpret_without_validating()) } |
| 61 | } |
| 62 | match and_then { |
| 63 | Ok(incomplete) => return Some(IncompleteUtf8(incomplete)), |
| 64 | Err(offset) => { |
| 65 | push_utf8(Tendril::from_slice(utf8::REPLACEMENT_CHARACTER)); |
| 66 | self.pop_front(offset as u32) |
| 67 | } |
| 68 | } |
| 69 | } |
| 70 | } |
| 71 | } |
| 72 | } |
| 73 | } |
| 74 | |
| 75 | impl IncompleteUtf8 { |
| 76 | pub fn try_complete<A, F>( |
| 77 | &mut self, |
| 78 | mut input: Tendril<fmt::Bytes, A>, |
| 79 | mut push_utf8: F, |
| 80 | ) -> Result<Tendril<fmt::Bytes, A>, ()> |
| 81 | where |
| 82 | A: Atomicity, |
| 83 | F: FnMut(Tendril<fmt::UTF8, A>), |
| 84 | { |
| 85 | let resume_at: usize; |
| 86 | match self.0.try_complete(&input) { |
| 87 | None => return Err(()), |
| 88 | Some((result: Result<&str, &[u8]>, rest: &[u8])) => { |
| 89 | push_utf8(Tendril::from_slice( |
| 90 | result.unwrap_or(default:utf8::REPLACEMENT_CHARACTER), |
| 91 | )); |
| 92 | resume_at = input.len() - rest.len(); |
| 93 | } |
| 94 | } |
| 95 | input.pop_front(resume_at as u32); |
| 96 | Ok(input) |
| 97 | } |
| 98 | } |
| 99 | |