1 | //! Defines utf8 error type. |
2 | |
3 | use crate::error::Error; |
4 | use crate::fmt; |
5 | |
6 | /// Errors which can occur when attempting to interpret a sequence of [`u8`] |
7 | /// as a string. |
8 | /// |
9 | /// As such, the `from_utf8` family of functions and methods for both [`String`]s |
10 | /// and [`&str`]s make use of this error, for example. |
11 | /// |
12 | /// [`String`]: ../../std/string/struct.String.html#method.from_utf8 |
13 | /// [`&str`]: super::from_utf8 |
14 | /// |
15 | /// # Examples |
16 | /// |
17 | /// This error type’s methods can be used to create functionality |
18 | /// similar to `String::from_utf8_lossy` without allocating heap memory: |
19 | /// |
20 | /// ``` |
21 | /// fn from_utf8_lossy<F>(mut input: &[u8], mut push: F) where F: FnMut(&str) { |
22 | /// loop { |
23 | /// match std::str::from_utf8(input) { |
24 | /// Ok(valid) => { |
25 | /// push(valid); |
26 | /// break |
27 | /// } |
28 | /// Err(error) => { |
29 | /// let (valid, after_valid) = input.split_at(error.valid_up_to()); |
30 | /// unsafe { |
31 | /// push(std::str::from_utf8_unchecked(valid)) |
32 | /// } |
33 | /// push(" \u{FFFD}" ); |
34 | /// |
35 | /// if let Some(invalid_sequence_length) = error.error_len() { |
36 | /// input = &after_valid[invalid_sequence_length..] |
37 | /// } else { |
38 | /// break |
39 | /// } |
40 | /// } |
41 | /// } |
42 | /// } |
43 | /// } |
44 | /// ``` |
45 | #[derive (Copy, Eq, PartialEq, Clone, Debug)] |
46 | #[stable (feature = "rust1" , since = "1.0.0" )] |
47 | pub struct Utf8Error { |
48 | pub(super) valid_up_to: usize, |
49 | pub(super) error_len: Option<u8>, |
50 | } |
51 | |
52 | impl Utf8Error { |
53 | /// Returns the index in the given string up to which valid UTF-8 was |
54 | /// verified. |
55 | /// |
56 | /// It is the maximum index such that `from_utf8(&input[..index])` |
57 | /// would return `Ok(_)`. |
58 | /// |
59 | /// # Examples |
60 | /// |
61 | /// Basic usage: |
62 | /// |
63 | /// ``` |
64 | /// use std::str; |
65 | /// |
66 | /// // some invalid bytes, in a vector |
67 | /// let sparkle_heart = vec![0, 159, 146, 150]; |
68 | /// |
69 | /// // std::str::from_utf8 returns a Utf8Error |
70 | /// let error = str::from_utf8(&sparkle_heart).unwrap_err(); |
71 | /// |
72 | /// // the second byte is invalid here |
73 | /// assert_eq!(1, error.valid_up_to()); |
74 | /// ``` |
75 | #[stable (feature = "utf8_error" , since = "1.5.0" )] |
76 | #[rustc_const_stable (feature = "const_str_from_utf8_shared" , since = "1.63.0" )] |
77 | #[must_use ] |
78 | #[inline ] |
79 | pub const fn valid_up_to(&self) -> usize { |
80 | self.valid_up_to |
81 | } |
82 | |
83 | /// Provides more information about the failure: |
84 | /// |
85 | /// * `None`: the end of the input was reached unexpectedly. |
86 | /// `self.valid_up_to()` is 1 to 3 bytes from the end of the input. |
87 | /// If a byte stream (such as a file or a network socket) is being decoded incrementally, |
88 | /// this could be a valid `char` whose UTF-8 byte sequence is spanning multiple chunks. |
89 | /// |
90 | /// * `Some(len)`: an unexpected byte was encountered. |
91 | /// The length provided is that of the invalid byte sequence |
92 | /// that starts at the index given by `valid_up_to()`. |
93 | /// Decoding should resume after that sequence |
94 | /// (after inserting a [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]) in case of |
95 | /// lossy decoding. |
96 | /// |
97 | /// [U+FFFD]: ../../std/char/constant.REPLACEMENT_CHARACTER.html |
98 | #[stable (feature = "utf8_error_error_len" , since = "1.20.0" )] |
99 | #[rustc_const_stable (feature = "const_str_from_utf8_shared" , since = "1.63.0" )] |
100 | #[must_use ] |
101 | #[inline ] |
102 | pub const fn error_len(&self) -> Option<usize> { |
103 | // FIXME: This should become `map` again, once it's `const` |
104 | match self.error_len { |
105 | Some(len) => Some(len as usize), |
106 | None => None, |
107 | } |
108 | } |
109 | } |
110 | |
111 | #[stable (feature = "rust1" , since = "1.0.0" )] |
112 | impl fmt::Display for Utf8Error { |
113 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
114 | if let Some(error_len: u8) = self.error_len { |
115 | write!( |
116 | f, |
117 | "invalid utf-8 sequence of {} bytes from index {}" , |
118 | error_len, self.valid_up_to |
119 | ) |
120 | } else { |
121 | write!(f, "incomplete utf-8 byte sequence from index {}" , self.valid_up_to) |
122 | } |
123 | } |
124 | } |
125 | |
126 | #[stable (feature = "rust1" , since = "1.0.0" )] |
127 | impl Error for Utf8Error { |
128 | #[allow (deprecated)] |
129 | fn description(&self) -> &str { |
130 | "invalid utf-8: corrupt contents" |
131 | } |
132 | } |
133 | |
134 | /// An error returned when parsing a `bool` using [`from_str`] fails |
135 | /// |
136 | /// [`from_str`]: super::FromStr::from_str |
137 | #[derive (Debug, Clone, PartialEq, Eq)] |
138 | #[non_exhaustive ] |
139 | #[stable (feature = "rust1" , since = "1.0.0" )] |
140 | pub struct ParseBoolError; |
141 | |
142 | #[stable (feature = "rust1" , since = "1.0.0" )] |
143 | impl fmt::Display for ParseBoolError { |
144 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
145 | "provided string was not `true` or `false`" .fmt(f) |
146 | } |
147 | } |
148 | |
149 | #[stable (feature = "rust1" , since = "1.0.0" )] |
150 | impl Error for ParseBoolError { |
151 | #[allow (deprecated)] |
152 | fn description(&self) -> &str { |
153 | "failed to parse bool" |
154 | } |
155 | } |
156 | |