1//! Character conversions.
2
3use crate::char::TryFromCharError;
4use crate::error::Error;
5use crate::fmt;
6use crate::mem::transmute;
7use crate::str::FromStr;
8use crate::ub_checks::assert_unsafe_precondition;
9
10/// Converts a `u32` to a `char`. See [`char::from_u32`].
11#[must_use]
12#[inline]
13pub(super) const fn from_u32(i: u32) -> Option<char> {
14 // FIXME(const-hack): once Result::ok is const fn, use it here
15 match char_try_from_u32(i) {
16 Ok(c: char) => Some(c),
17 Err(_) => None,
18 }
19}
20
21/// Converts a `u32` to a `char`, ignoring validity. See [`char::from_u32_unchecked`].
22#[inline]
23#[must_use]
24#[allow(unnecessary_transmutes)]
25#[track_caller]
26pub(super) const unsafe fn from_u32_unchecked(i: u32) -> char {
27 // SAFETY: the caller must guarantee that `i` is a valid char value.
28 unsafe {
29 assert_unsafe_precondition!(
30 check_language_ub,
31 "invalid value for `char`",
32 (i: u32 = i) => char_try_from_u32(i).is_ok()
33 );
34 transmute(src:i)
35 }
36}
37
38#[stable(feature = "char_convert", since = "1.13.0")]
39impl From<char> for u32 {
40 /// Converts a [`char`] into a [`u32`].
41 ///
42 /// # Examples
43 ///
44 /// ```
45 /// let c = 'c';
46 /// let u = u32::from(c);
47 /// assert!(4 == size_of_val(&u))
48 /// ```
49 #[inline]
50 fn from(c: char) -> Self {
51 c as u32
52 }
53}
54
55#[stable(feature = "more_char_conversions", since = "1.51.0")]
56impl From<char> for u64 {
57 /// Converts a [`char`] into a [`u64`].
58 ///
59 /// # Examples
60 ///
61 /// ```
62 /// let c = '👤';
63 /// let u = u64::from(c);
64 /// assert!(8 == size_of_val(&u))
65 /// ```
66 #[inline]
67 fn from(c: char) -> Self {
68 // The char is casted to the value of the code point, then zero-extended to 64 bit.
69 // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
70 c as u64
71 }
72}
73
74#[stable(feature = "more_char_conversions", since = "1.51.0")]
75impl From<char> for u128 {
76 /// Converts a [`char`] into a [`u128`].
77 ///
78 /// # Examples
79 ///
80 /// ```
81 /// let c = 'âš™';
82 /// let u = u128::from(c);
83 /// assert!(16 == size_of_val(&u))
84 /// ```
85 #[inline]
86 fn from(c: char) -> Self {
87 // The char is casted to the value of the code point, then zero-extended to 128 bit.
88 // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
89 c as u128
90 }
91}
92
93/// Maps a `char` with code point in U+0000..=U+00FF to a byte in 0x00..=0xFF with same value,
94/// failing if the code point is greater than U+00FF.
95///
96/// See [`impl From<u8> for char`](char#impl-From<u8>-for-char) for details on the encoding.
97#[stable(feature = "u8_from_char", since = "1.59.0")]
98impl TryFrom<char> for u8 {
99 type Error = TryFromCharError;
100
101 /// Tries to convert a [`char`] into a [`u8`].
102 ///
103 /// # Examples
104 ///
105 /// ```
106 /// let a = 'ÿ'; // U+00FF
107 /// let b = 'Ä€'; // U+0100
108 /// assert_eq!(u8::try_from(a), Ok(0xFF_u8));
109 /// assert!(u8::try_from(b).is_err());
110 /// ```
111 #[inline]
112 fn try_from(c: char) -> Result<u8, Self::Error> {
113 u8::try_from(u32::from(c)).map_err(|_| TryFromCharError(()))
114 }
115}
116
117/// Maps a `char` with code point in U+0000..=U+FFFF to a `u16` in 0x0000..=0xFFFF with same value,
118/// failing if the code point is greater than U+FFFF.
119///
120/// This corresponds to the UCS-2 encoding, as specified in ISO/IEC 10646:2003.
121#[stable(feature = "u16_from_char", since = "1.74.0")]
122impl TryFrom<char> for u16 {
123 type Error = TryFromCharError;
124
125 /// Tries to convert a [`char`] into a [`u16`].
126 ///
127 /// # Examples
128 ///
129 /// ```
130 /// let trans_rights = 'âš§'; // U+26A7
131 /// let ninjas = '🥷'; // U+1F977
132 /// assert_eq!(u16::try_from(trans_rights), Ok(0x26A7_u16));
133 /// assert!(u16::try_from(ninjas).is_err());
134 /// ```
135 #[inline]
136 fn try_from(c: char) -> Result<u16, Self::Error> {
137 u16::try_from(u32::from(c)).map_err(|_| TryFromCharError(()))
138 }
139}
140
141/// Maps a byte in 0x00..=0xFF to a `char` whose code point has the same value, in U+0000..=U+00FF.
142///
143/// Unicode is designed such that this effectively decodes bytes
144/// with the character encoding that IANA calls ISO-8859-1.
145/// This encoding is compatible with ASCII.
146///
147/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen),
148/// which leaves some "blanks", byte values that are not assigned to any character.
149/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes.
150///
151/// Note that this is *also* different from Windows-1252 a.k.a. code page 1252,
152/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks
153/// to punctuation and various Latin characters.
154///
155/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/)
156/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases
157/// for a superset of Windows-1252 that fills the remaining blanks with corresponding
158/// C0 and C1 control codes.
159#[stable(feature = "char_convert", since = "1.13.0")]
160impl From<u8> for char {
161 /// Converts a [`u8`] into a [`char`].
162 ///
163 /// # Examples
164 ///
165 /// ```
166 /// let u = 32 as u8;
167 /// let c = char::from(u);
168 /// assert!(4 == size_of_val(&c))
169 /// ```
170 #[inline]
171 fn from(i: u8) -> Self {
172 i as char
173 }
174}
175
176/// An error which can be returned when parsing a char.
177///
178/// This `struct` is created when using the [`char::from_str`] method.
179#[stable(feature = "char_from_str", since = "1.20.0")]
180#[derive(Clone, Debug, PartialEq, Eq)]
181pub struct ParseCharError {
182 kind: CharErrorKind,
183}
184
185#[derive(Copy, Clone, Debug, PartialEq, Eq)]
186enum CharErrorKind {
187 EmptyString,
188 TooManyChars,
189}
190
191#[stable(feature = "char_from_str", since = "1.20.0")]
192impl Error for ParseCharError {
193 #[allow(deprecated)]
194 fn description(&self) -> &str {
195 match self.kind {
196 CharErrorKind::EmptyString => "cannot parse char from empty string",
197 CharErrorKind::TooManyChars => "too many characters in string",
198 }
199 }
200}
201
202#[stable(feature = "char_from_str", since = "1.20.0")]
203impl fmt::Display for ParseCharError {
204 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
205 #[allow(deprecated)]
206 self.description().fmt(f)
207 }
208}
209
210#[stable(feature = "char_from_str", since = "1.20.0")]
211impl FromStr for char {
212 type Err = ParseCharError;
213
214 #[inline]
215 fn from_str(s: &str) -> Result<Self, Self::Err> {
216 let mut chars: Chars<'_> = s.chars();
217 match (chars.next(), chars.next()) {
218 (None, _) => Err(ParseCharError { kind: CharErrorKind::EmptyString }),
219 (Some(c: char), None) => Ok(c),
220 _ => Err(ParseCharError { kind: CharErrorKind::TooManyChars }),
221 }
222 }
223}
224
225#[inline]
226#[allow(unnecessary_transmutes)]
227const fn char_try_from_u32(i: u32) -> Result<char, CharTryFromError> {
228 // This is an optimized version of the check
229 // (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF),
230 // which can also be written as
231 // i >= 0x110000 || (i >= 0xD800 && i < 0xE000).
232 //
233 // The XOR with 0xD800 permutes the ranges such that 0xD800..0xE000 is
234 // mapped to 0x0000..0x0800, while keeping all the high bits outside 0xFFFF the same.
235 // In particular, numbers >= 0x110000 stay in this range.
236 //
237 // Subtracting 0x800 causes 0x0000..0x0800 to wrap, meaning that a single
238 // unsigned comparison against 0x110000 - 0x800 will detect both the wrapped
239 // surrogate range as well as the numbers originally larger than 0x110000.
240 //
241 if (i ^ 0xD800).wrapping_sub(0x800) >= 0x110000 - 0x800 {
242 Err(CharTryFromError(()))
243 } else {
244 // SAFETY: checked that it's a legal unicode value
245 Ok(unsafe { transmute(src:i) })
246 }
247}
248
249#[stable(feature = "try_from", since = "1.34.0")]
250impl TryFrom<u32> for char {
251 type Error = CharTryFromError;
252
253 #[inline]
254 fn try_from(i: u32) -> Result<Self, Self::Error> {
255 char_try_from_u32(i)
256 }
257}
258
259/// The error type returned when a conversion from [`prim@u32`] to [`prim@char`] fails.
260///
261/// This `struct` is created by the [`char::try_from<u32>`](char#impl-TryFrom<u32>-for-char) method.
262/// See its documentation for more.
263#[stable(feature = "try_from", since = "1.34.0")]
264#[derive(Copy, Clone, Debug, PartialEq, Eq)]
265pub struct CharTryFromError(());
266
267#[stable(feature = "try_from", since = "1.34.0")]
268impl fmt::Display for CharTryFromError {
269 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
270 "converted integer out of range for `char`".fmt(f)
271 }
272}
273
274/// Converts a digit in the given radix to a `char`. See [`char::from_digit`].
275#[inline]
276#[must_use]
277pub(super) const fn from_digit(num: u32, radix: u32) -> Option<char> {
278 if radix > 36 {
279 panic!("from_digit: radix is too high (maximum 36)");
280 }
281 if num < radix {
282 let num: u8 = num as u8;
283 if num < 10 { Some((b'0' + num) as char) } else { Some((b'a' + num - 10) as char) }
284 } else {
285 None
286 }
287}
288

Provided by KDAB

Privacy Policy
Learn Rust with the experts
Find out more