1 | //! Character conversions. |
2 | |
3 | use crate::char::TryFromCharError; |
4 | use crate::error::Error; |
5 | use crate::fmt; |
6 | use crate::mem::transmute; |
7 | use crate::str::FromStr; |
8 | use crate::ub_checks::assert_unsafe_precondition; |
9 | |
10 | /// Converts a `u32` to a `char`. See [`char::from_u32`]. |
11 | #[must_use ] |
12 | #[inline ] |
13 | pub(super) const fn from_u32(i: u32) -> Option<char> { |
14 | // FIXME: once Result::ok is const fn, use it here |
15 | match char_try_from_u32(i) { |
16 | Ok(c: char) => Some(c), |
17 | Err(_) => None, |
18 | } |
19 | } |
20 | |
21 | /// Converts a `u32` to a `char`, ignoring validity. See [`char::from_u32_unchecked`]. |
22 | #[inline ] |
23 | #[must_use ] |
24 | pub(super) const unsafe fn from_u32_unchecked(i: u32) -> char { |
25 | // SAFETY: the caller must guarantee that `i` is a valid char value. |
26 | unsafe { |
27 | assert_unsafe_precondition!( |
28 | check_language_ub, |
29 | "invalid value for `char`" , |
30 | (i: u32 = i) => char_try_from_u32(i).is_ok() |
31 | ); |
32 | transmute(src:i) |
33 | } |
34 | } |
35 | |
36 | #[stable (feature = "char_convert" , since = "1.13.0" )] |
37 | impl From<char> for u32 { |
38 | /// Converts a [`char`] into a [`u32`]. |
39 | /// |
40 | /// # Examples |
41 | /// |
42 | /// ``` |
43 | /// use std::mem; |
44 | /// |
45 | /// let c = 'c' ; |
46 | /// let u = u32::from(c); |
47 | /// assert!(4 == mem::size_of_val(&u)) |
48 | /// ``` |
49 | #[inline ] |
50 | fn from(c: char) -> Self { |
51 | c as u32 |
52 | } |
53 | } |
54 | |
55 | #[stable (feature = "more_char_conversions" , since = "1.51.0" )] |
56 | impl From<char> for u64 { |
57 | /// Converts a [`char`] into a [`u64`]. |
58 | /// |
59 | /// # Examples |
60 | /// |
61 | /// ``` |
62 | /// use std::mem; |
63 | /// |
64 | /// let c = '👤' ; |
65 | /// let u = u64::from(c); |
66 | /// assert!(8 == mem::size_of_val(&u)) |
67 | /// ``` |
68 | #[inline ] |
69 | fn from(c: char) -> Self { |
70 | // The char is casted to the value of the code point, then zero-extended to 64 bit. |
71 | // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics] |
72 | c as u64 |
73 | } |
74 | } |
75 | |
76 | #[stable (feature = "more_char_conversions" , since = "1.51.0" )] |
77 | impl From<char> for u128 { |
78 | /// Converts a [`char`] into a [`u128`]. |
79 | /// |
80 | /// # Examples |
81 | /// |
82 | /// ``` |
83 | /// use std::mem; |
84 | /// |
85 | /// let c = 'âš™' ; |
86 | /// let u = u128::from(c); |
87 | /// assert!(16 == mem::size_of_val(&u)) |
88 | /// ``` |
89 | #[inline ] |
90 | fn from(c: char) -> Self { |
91 | // The char is casted to the value of the code point, then zero-extended to 128 bit. |
92 | // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics] |
93 | c as u128 |
94 | } |
95 | } |
96 | |
97 | /// Maps a `char` with code point in U+0000..=U+00FF to a byte in 0x00..=0xFF with same value, |
98 | /// failing if the code point is greater than U+00FF. |
99 | /// |
100 | /// See [`impl From<u8> for char`](char#impl-From<u8>-for-char) for details on the encoding. |
101 | #[stable (feature = "u8_from_char" , since = "1.59.0" )] |
102 | impl TryFrom<char> for u8 { |
103 | type Error = TryFromCharError; |
104 | |
105 | /// Tries to convert a [`char`] into a [`u8`]. |
106 | /// |
107 | /// # Examples |
108 | /// |
109 | /// ``` |
110 | /// let a = 'ÿ' ; // U+00FF |
111 | /// let b = 'Ä€' ; // U+0100 |
112 | /// assert_eq!(u8::try_from(a), Ok(0xFF_u8)); |
113 | /// assert!(u8::try_from(b).is_err()); |
114 | /// ``` |
115 | #[inline ] |
116 | fn try_from(c: char) -> Result<u8, Self::Error> { |
117 | u8::try_from(u32::from(c)).map_err(|_| TryFromCharError(())) |
118 | } |
119 | } |
120 | |
121 | /// Maps a `char` with code point in U+0000..=U+FFFF to a `u16` in 0x0000..=0xFFFF with same value, |
122 | /// failing if the code point is greater than U+FFFF. |
123 | /// |
124 | /// This corresponds to the UCS-2 encoding, as specified in ISO/IEC 10646:2003. |
125 | #[stable (feature = "u16_from_char" , since = "1.74.0" )] |
126 | impl TryFrom<char> for u16 { |
127 | type Error = TryFromCharError; |
128 | |
129 | /// Tries to convert a [`char`] into a [`u16`]. |
130 | /// |
131 | /// # Examples |
132 | /// |
133 | /// ``` |
134 | /// let trans_rights = '⚧' ; // U+26A7 |
135 | /// let ninjas = '🥷' ; // U+1F977 |
136 | /// assert_eq!(u16::try_from(trans_rights), Ok(0x26A7_u16)); |
137 | /// assert!(u16::try_from(ninjas).is_err()); |
138 | /// ``` |
139 | #[inline ] |
140 | fn try_from(c: char) -> Result<u16, Self::Error> { |
141 | u16::try_from(u32::from(c)).map_err(|_| TryFromCharError(())) |
142 | } |
143 | } |
144 | |
145 | /// Maps a byte in 0x00..=0xFF to a `char` whose code point has the same value, in U+0000..=U+00FF. |
146 | /// |
147 | /// Unicode is designed such that this effectively decodes bytes |
148 | /// with the character encoding that IANA calls ISO-8859-1. |
149 | /// This encoding is compatible with ASCII. |
150 | /// |
151 | /// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen), |
152 | /// which leaves some "blanks", byte values that are not assigned to any character. |
153 | /// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes. |
154 | /// |
155 | /// Note that this is *also* different from Windows-1252 a.k.a. code page 1252, |
156 | /// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks |
157 | /// to punctuation and various Latin characters. |
158 | /// |
159 | /// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/) |
160 | /// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases |
161 | /// for a superset of Windows-1252 that fills the remaining blanks with corresponding |
162 | /// C0 and C1 control codes. |
163 | #[stable (feature = "char_convert" , since = "1.13.0" )] |
164 | impl From<u8> for char { |
165 | /// Converts a [`u8`] into a [`char`]. |
166 | /// |
167 | /// # Examples |
168 | /// |
169 | /// ``` |
170 | /// use std::mem; |
171 | /// |
172 | /// let u = 32 as u8; |
173 | /// let c = char::from(u); |
174 | /// assert!(4 == mem::size_of_val(&c)) |
175 | /// ``` |
176 | #[inline ] |
177 | fn from(i: u8) -> Self { |
178 | i as char |
179 | } |
180 | } |
181 | |
182 | /// An error which can be returned when parsing a char. |
183 | /// |
184 | /// This `struct` is created when using the [`char::from_str`] method. |
185 | #[stable (feature = "char_from_str" , since = "1.20.0" )] |
186 | #[derive (Clone, Debug, PartialEq, Eq)] |
187 | pub struct ParseCharError { |
188 | kind: CharErrorKind, |
189 | } |
190 | |
191 | #[derive (Copy, Clone, Debug, PartialEq, Eq)] |
192 | enum CharErrorKind { |
193 | EmptyString, |
194 | TooManyChars, |
195 | } |
196 | |
197 | #[stable (feature = "char_from_str" , since = "1.20.0" )] |
198 | impl Error for ParseCharError { |
199 | #[allow (deprecated)] |
200 | fn description(&self) -> &str { |
201 | match self.kind { |
202 | CharErrorKind::EmptyString => "cannot parse char from empty string" , |
203 | CharErrorKind::TooManyChars => "too many characters in string" , |
204 | } |
205 | } |
206 | } |
207 | |
208 | #[stable (feature = "char_from_str" , since = "1.20.0" )] |
209 | impl fmt::Display for ParseCharError { |
210 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
211 | #[allow (deprecated)] |
212 | self.description().fmt(f) |
213 | } |
214 | } |
215 | |
216 | #[stable (feature = "char_from_str" , since = "1.20.0" )] |
217 | impl FromStr for char { |
218 | type Err = ParseCharError; |
219 | |
220 | #[inline ] |
221 | fn from_str(s: &str) -> Result<Self, Self::Err> { |
222 | let mut chars: Chars<'_> = s.chars(); |
223 | match (chars.next(), chars.next()) { |
224 | (None, _) => Err(ParseCharError { kind: CharErrorKind::EmptyString }), |
225 | (Some(c: char), None) => Ok(c), |
226 | _ => Err(ParseCharError { kind: CharErrorKind::TooManyChars }), |
227 | } |
228 | } |
229 | } |
230 | |
231 | #[inline ] |
232 | const fn char_try_from_u32(i: u32) -> Result<char, CharTryFromError> { |
233 | // This is an optimized version of the check |
234 | // (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF), |
235 | // which can also be written as |
236 | // i >= 0x110000 || (i >= 0xD800 && i < 0xE000). |
237 | // |
238 | // The XOR with 0xD800 permutes the ranges such that 0xD800..0xE000 is |
239 | // mapped to 0x0000..0x0800, while keeping all the high bits outside 0xFFFF the same. |
240 | // In particular, numbers >= 0x110000 stay in this range. |
241 | // |
242 | // Subtracting 0x800 causes 0x0000..0x0800 to wrap, meaning that a single |
243 | // unsigned comparison against 0x110000 - 0x800 will detect both the wrapped |
244 | // surrogate range as well as the numbers originally larger than 0x110000. |
245 | // |
246 | if (i ^ 0xD800).wrapping_sub(0x800) >= 0x110000 - 0x800 { |
247 | Err(CharTryFromError(())) |
248 | } else { |
249 | // SAFETY: checked that it's a legal unicode value |
250 | Ok(unsafe { transmute(src:i) }) |
251 | } |
252 | } |
253 | |
254 | #[stable (feature = "try_from" , since = "1.34.0" )] |
255 | impl TryFrom<u32> for char { |
256 | type Error = CharTryFromError; |
257 | |
258 | #[inline ] |
259 | fn try_from(i: u32) -> Result<Self, Self::Error> { |
260 | char_try_from_u32(i) |
261 | } |
262 | } |
263 | |
264 | /// The error type returned when a conversion from [`prim@u32`] to [`prim@char`] fails. |
265 | /// |
266 | /// This `struct` is created by the [`char::try_from<u32>`](char#impl-TryFrom<u32>-for-char) method. |
267 | /// See its documentation for more. |
268 | #[stable (feature = "try_from" , since = "1.34.0" )] |
269 | #[derive (Copy, Clone, Debug, PartialEq, Eq)] |
270 | pub struct CharTryFromError(()); |
271 | |
272 | #[stable (feature = "try_from" , since = "1.34.0" )] |
273 | impl fmt::Display for CharTryFromError { |
274 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
275 | "converted integer out of range for `char`" .fmt(f) |
276 | } |
277 | } |
278 | |
279 | /// Converts a digit in the given radix to a `char`. See [`char::from_digit`]. |
280 | #[inline ] |
281 | #[must_use ] |
282 | pub(super) const fn from_digit(num: u32, radix: u32) -> Option<char> { |
283 | if radix > 36 { |
284 | panic!("from_digit: radix is too high (maximum 36)" ); |
285 | } |
286 | if num < radix { |
287 | let num: u8 = num as u8; |
288 | if num < 10 { Some((b'0' + num) as char) } else { Some((b'a' + num - 10) as char) } |
289 | } else { |
290 | None |
291 | } |
292 | } |
293 | |