| 1 | //! Defines utf8 error type. |
| 2 | |
| 3 | use crate::error::Error; |
| 4 | use crate::fmt; |
| 5 | |
| 6 | /// Errors which can occur when attempting to interpret a sequence of [`u8`] |
| 7 | /// as a string. |
| 8 | /// |
| 9 | /// As such, the `from_utf8` family of functions and methods for both [`String`]s |
| 10 | /// and [`&str`]s make use of this error, for example. |
| 11 | /// |
| 12 | /// [`String`]: ../../std/string/struct.String.html#method.from_utf8 |
| 13 | /// [`&str`]: super::from_utf8 |
| 14 | /// |
| 15 | /// # Examples |
| 16 | /// |
| 17 | /// This error type’s methods can be used to create functionality |
| 18 | /// similar to `String::from_utf8_lossy` without allocating heap memory: |
| 19 | /// |
| 20 | /// ``` |
| 21 | /// fn from_utf8_lossy<F>(mut input: &[u8], mut push: F) where F: FnMut(&str) { |
| 22 | /// loop { |
| 23 | /// match std::str::from_utf8(input) { |
| 24 | /// Ok(valid) => { |
| 25 | /// push(valid); |
| 26 | /// break |
| 27 | /// } |
| 28 | /// Err(error) => { |
| 29 | /// let (valid, after_valid) = input.split_at(error.valid_up_to()); |
| 30 | /// unsafe { |
| 31 | /// push(std::str::from_utf8_unchecked(valid)) |
| 32 | /// } |
| 33 | /// push(" \u{FFFD}" ); |
| 34 | /// |
| 35 | /// if let Some(invalid_sequence_length) = error.error_len() { |
| 36 | /// input = &after_valid[invalid_sequence_length..] |
| 37 | /// } else { |
| 38 | /// break |
| 39 | /// } |
| 40 | /// } |
| 41 | /// } |
| 42 | /// } |
| 43 | /// } |
| 44 | /// ``` |
| 45 | #[derive (Copy, Eq, PartialEq, Clone, Debug)] |
| 46 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 47 | pub struct Utf8Error { |
| 48 | pub(super) valid_up_to: usize, |
| 49 | pub(super) error_len: Option<u8>, |
| 50 | } |
| 51 | |
| 52 | impl Utf8Error { |
| 53 | /// Returns the index in the given string up to which valid UTF-8 was |
| 54 | /// verified. |
| 55 | /// |
| 56 | /// It is the maximum index such that `from_utf8(&input[..index])` |
| 57 | /// would return `Ok(_)`. |
| 58 | /// |
| 59 | /// # Examples |
| 60 | /// |
| 61 | /// Basic usage: |
| 62 | /// |
| 63 | /// ``` |
| 64 | /// use std::str; |
| 65 | /// |
| 66 | /// // some invalid bytes, in a vector |
| 67 | /// let sparkle_heart = vec![0, 159, 146, 150]; |
| 68 | /// |
| 69 | /// // std::str::from_utf8 returns a Utf8Error |
| 70 | /// let error = str::from_utf8(&sparkle_heart).unwrap_err(); |
| 71 | /// |
| 72 | /// // the second byte is invalid here |
| 73 | /// assert_eq!(1, error.valid_up_to()); |
| 74 | /// ``` |
| 75 | #[stable (feature = "utf8_error" , since = "1.5.0" )] |
| 76 | #[rustc_const_stable (feature = "const_str_from_utf8_shared" , since = "1.63.0" )] |
| 77 | #[must_use ] |
| 78 | #[inline ] |
| 79 | pub const fn valid_up_to(&self) -> usize { |
| 80 | self.valid_up_to |
| 81 | } |
| 82 | |
| 83 | /// Provides more information about the failure: |
| 84 | /// |
| 85 | /// * `None`: the end of the input was reached unexpectedly. |
| 86 | /// `self.valid_up_to()` is 1 to 3 bytes from the end of the input. |
| 87 | /// If a byte stream (such as a file or a network socket) is being decoded incrementally, |
| 88 | /// this could be a valid `char` whose UTF-8 byte sequence is spanning multiple chunks. |
| 89 | /// |
| 90 | /// * `Some(len)`: an unexpected byte was encountered. |
| 91 | /// The length provided is that of the invalid byte sequence |
| 92 | /// that starts at the index given by `valid_up_to()`. |
| 93 | /// Decoding should resume after that sequence |
| 94 | /// (after inserting a [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]) in case of |
| 95 | /// lossy decoding. |
| 96 | /// |
| 97 | /// [U+FFFD]: ../../std/char/constant.REPLACEMENT_CHARACTER.html |
| 98 | #[stable (feature = "utf8_error_error_len" , since = "1.20.0" )] |
| 99 | #[rustc_const_stable (feature = "const_str_from_utf8_shared" , since = "1.63.0" )] |
| 100 | #[must_use ] |
| 101 | #[inline ] |
| 102 | pub const fn error_len(&self) -> Option<usize> { |
| 103 | // FIXME(const-hack): This should become `map` again, once it's `const` |
| 104 | match self.error_len { |
| 105 | Some(len) => Some(len as usize), |
| 106 | None => None, |
| 107 | } |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 112 | impl fmt::Display for Utf8Error { |
| 113 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 114 | if let Some(error_len: u8) = self.error_len { |
| 115 | write!( |
| 116 | f, |
| 117 | "invalid utf-8 sequence of {} bytes from index {}" , |
| 118 | error_len, self.valid_up_to |
| 119 | ) |
| 120 | } else { |
| 121 | write!(f, "incomplete utf-8 byte sequence from index {}" , self.valid_up_to) |
| 122 | } |
| 123 | } |
| 124 | } |
| 125 | |
| 126 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 127 | impl Error for Utf8Error { |
| 128 | #[allow (deprecated)] |
| 129 | fn description(&self) -> &str { |
| 130 | "invalid utf-8: corrupt contents" |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | /// An error returned when parsing a `bool` using [`from_str`] fails |
| 135 | /// |
| 136 | /// [`from_str`]: super::FromStr::from_str |
| 137 | #[derive (Debug, Clone, PartialEq, Eq)] |
| 138 | #[non_exhaustive ] |
| 139 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 140 | pub struct ParseBoolError; |
| 141 | |
| 142 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 143 | impl fmt::Display for ParseBoolError { |
| 144 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 145 | "provided string was not `true` or `false`" .fmt(f) |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 150 | impl Error for ParseBoolError { |
| 151 | #[allow (deprecated)] |
| 152 | fn description(&self) -> &str { |
| 153 | "failed to parse bool" |
| 154 | } |
| 155 | } |
| 156 | |