convert.rs source code [crates/core/src/char/convert.rs]

1	//! Character conversions.
2
3	use crate::char::TryFromCharError;
4	use crate::error::Error;
5	use crate::fmt;
6	use crate::mem::transmute;
7	use crate::str::FromStr;
8	use crate::ub_checks::assert_unsafe_precondition;
9
10	/// Converts a `u32` to a `char`. See [`char::from_u32`].
11	#[must_use]
12	#[inline]
13	pub(super) const fn from_u32(i: u32) -> Option<char> {
14	// FIXME(const-hack): once Result::ok is const fn, use it here
15	match char_try_from_u32(i) {
16	Ok(c: char) => Some(c),
17	Err(_) => None,
18	}
19	}
20
21	/// Converts a `u32` to a `char`, ignoring validity. See [`char::from_u32_unchecked`].
22	#[inline]
23	#[must_use]
24	pub(super) const unsafe fn from_u32_unchecked(i: u32) -> char {
25	// SAFETY: the caller must guarantee that `i` is a valid char value.
26	unsafe {
27	assert_unsafe_precondition!(
28	check_language_ub,
29	"invalid value for `char`",
30	(i: u32 = i) => char_try_from_u32(i).is_ok()
31	);
32	transmute(src:i)
33	}
34	}
35
36	#[stable(feature = "char_convert", since = "1.13.0")]
37	impl From<char> for u32 {
38	/// Converts a [`char`] into a [`u32`].
39	///
40	/// # Examples
41	///
42	/// ```
43	/// let c = 'c';
44	/// let u = u32::from(c);
45	/// assert!(`4` == size_of_val(&u))
46	/// ```
47	#[inline]
48	fn from(c: char) -> Self {
49	c as u32
50	}
51	}
52
53	#[stable(feature = "more_char_conversions", since = "1.51.0")]
54	impl From<char> for u64 {
55	/// Converts a [`char`] into a [`u64`].
56	///
57	/// # Examples
58	///
59	/// ```
60	/// let c = '👤';
61	/// let u = u64::from(c);
62	/// assert!(`8` == size_of_val(&u))
63	/// ```
64	#[inline]
65	fn from(c: char) -> Self {
66	// The char is casted to the value of the code point, then zero-extended to 64 bit.
67	// See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
68	c as u64
69	}
70	}
71
72	#[stable(feature = "more_char_conversions", since = "1.51.0")]
73	impl From<char> for u128 {
74	/// Converts a [`char`] into a [`u128`].
75	///
76	/// # Examples
77	///
78	/// ```
79	/// let c = '⚙';
80	/// let u = u128::from(c);
81	/// assert!(`16` == size_of_val(&u))
82	/// ```
83	#[inline]
84	fn from(c: char) -> Self {
85	// The char is casted to the value of the code point, then zero-extended to 128 bit.
86	// See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
87	c as u128
88	}
89	}
90
91	/// Maps a `char` with code point in U+0000..=U+00FF to a byte in 0x00..=0xFF with same value,
92	/// failing if the code point is greater than U+00FF.
93	///
94	/// See [`impl From<u8> for char`](char#impl-From<u8>-for-char) for details on the encoding.
95	#[stable(feature = "u8_from_char", since = "1.59.0")]
96	impl TryFrom<char> for u8 {
97	type Error = TryFromCharError;
98
99	/// Tries to convert a [`char`] into a [`u8`].
100	///
101	/// # Examples
102	///
103	/// ```
104	/// let a = 'ÿ'; // U+00FF
105	/// let b = 'Ā'; // U+0100
106	/// assert_eq!(u8::try_from(a), Ok(`0xFF_u8`));
107	/// assert!(u8::try_from(b).is_err());
108	/// ```
109	#[inline]
110	fn try_from(c: char) -> Result<u8, Self::Error> {
111	u8::try_from(u32::from(c)).map_err(\|_\| TryFromCharError(()))
112	}
113	}
114
115	/// Maps a `char` with code point in U+0000..=U+FFFF to a `u16` in 0x0000..=0xFFFF with same value,
116	/// failing if the code point is greater than U+FFFF.
117	///
118	/// This corresponds to the UCS-2 encoding, as specified in ISO/IEC 10646:2003.
119	#[stable(feature = "u16_from_char", since = "1.74.0")]
120	impl TryFrom<char> for u16 {
121	type Error = TryFromCharError;
122
123	/// Tries to convert a [`char`] into a [`u16`].
124	///
125	/// # Examples
126	///
127	/// ```
128	/// let trans_rights = '⚧'; // U+26A7
129	/// let ninjas = '🥷'; // U+1F977
130	/// assert_eq!(u16::try_from(trans_rights), Ok(`0x26A7_u16`));
131	/// assert!(u16::try_from(ninjas).is_err());
132	/// ```
133	#[inline]
134	fn try_from(c: char) -> Result<u16, Self::Error> {
135	u16::try_from(u32::from(c)).map_err(\|_\| TryFromCharError(()))
136	}
137	}
138
139	/// Maps a byte in 0x00..=0xFF to a `char` whose code point has the same value, in U+0000..=U+00FF.
140	///
141	/// Unicode is designed such that this effectively decodes bytes
142	/// with the character encoding that IANA calls ISO-8859-1.
143	/// This encoding is compatible with ASCII.
144	///
145	/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen),
146	/// which leaves some "blanks", byte values that are not assigned to any character.
147	/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes.
148	///
149	/// Note that this is also* different from Windows-1252 a.k.a. code page 1252,*
150	/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks
151	/// to punctuation and various Latin characters.
152	///
153	/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/)
154	/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases
155	/// for a superset of Windows-1252 that fills the remaining blanks with corresponding
156	/// C0 and C1 control codes.
157	#[stable(feature = "char_convert", since = "1.13.0")]
158	impl From<u8> for char {
159	/// Converts a [`u8`] into a [`char`].
160	///
161	/// # Examples
162	///
163	/// ```
164	/// let u = `32` as u8;
165	/// let c = char::from(u);
166	/// assert!(`4` == size_of_val(&c))
167	/// ```
168	#[inline]
169	fn from(i: u8) -> Self {
170	i as char
171	}
172	}
173
174	/// An error which can be returned when parsing a char.
175	///
176	/// This `struct` is created when using the [`char::from_str`] method.
177	#[stable(feature = "char_from_str", since = "1.20.0")]
178	#[derive(Clone, Debug, PartialEq, Eq)]
179	pub struct ParseCharError {
180	kind: CharErrorKind,
181	}
182
183	#[derive(Copy, Clone, Debug, PartialEq, Eq)]
184	enum CharErrorKind {
185	EmptyString,
186	TooManyChars,
187	}
188
189	#[stable(feature = "char_from_str", since = "1.20.0")]
190	impl Error for ParseCharError {
191	#[allow(deprecated)]
192	fn description(&self) -> &str {
193	match self.kind {
194	CharErrorKind::EmptyString => "cannot parse char from empty string",
195	CharErrorKind::TooManyChars => "too many characters in string",
196	}
197	}
198	}
199
200	#[stable(feature = "char_from_str", since = "1.20.0")]
201	impl fmt::Display for ParseCharError {
202	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
203	#[allow(deprecated)]
204	self.description().fmt(f)
205	}
206	}
207
208	#[stable(feature = "char_from_str", since = "1.20.0")]
209	impl FromStr for char {
210	type Err = ParseCharError;
211
212	#[inline]
213	fn from_str(s: &str) -> Result<Self, Self::Err> {
214	let mut chars: Chars<'_> = s.chars();
215	match (chars.next(), chars.next()) {
216	(None, _) => Err(ParseCharError { kind: CharErrorKind::EmptyString }),
217	(Some(c: char), None) => Ok(c),
218	_ => Err(ParseCharError { kind: CharErrorKind::TooManyChars }),
219	}
220	}
221	}
222
223	#[inline]
224	const fn char_try_from_u32(i: u32) -> Result<char, CharTryFromError> {
225	// This is an optimized version of the check
226	// (i > MAX as u32) \|\| (i >= 0xD800 && i <= 0xDFFF),
227	// which can also be written as
228	// i >= 0x110000 \|\| (i >= 0xD800 && i < 0xE000).
229	//
230	// The XOR with 0xD800 permutes the ranges such that 0xD800..0xE000 is
231	// mapped to 0x0000..0x0800, while keeping all the high bits outside 0xFFFF the same.
232	// In particular, numbers >= 0x110000 stay in this range.
233	//
234	// Subtracting 0x800 causes 0x0000..0x0800 to wrap, meaning that a single
235	// unsigned comparison against 0x110000 - 0x800 will detect both the wrapped
236	// surrogate range as well as the numbers originally larger than 0x110000.
237	//
238	if (i ^ `0xD800`).wrapping_sub(`0x800`) >= `0x110000` - `0x800` {
239	Err(CharTryFromError(()))
240	} else {
241	// SAFETY: checked that it's a legal unicode value
242	Ok(unsafe { transmute(src:i) })
243	}
244	}
245
246	#[stable(feature = "try_from", since = "1.34.0")]
247	impl TryFrom<u32> for char {
248	type Error = CharTryFromError;
249
250	#[inline]
251	fn try_from(i: u32) -> Result<Self, Self::Error> {
252	char_try_from_u32(i)
253	}
254	}
255
256	/// The error type returned when a conversion from [`prim@u32`] to [`prim@char`] fails.
257	///
258	/// This `struct` is created by the [`char::try_from<u32>`](char#impl-TryFrom<u32>-for-char) method.
259	/// See its documentation for more.
260	#[stable(feature = "try_from", since = "1.34.0")]
261	#[derive(Copy, Clone, Debug, PartialEq, Eq)]
262	pub struct CharTryFromError(());
263
264	#[stable(feature = "try_from", since = "1.34.0")]
265	impl fmt::Display for CharTryFromError {
266	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
267	"converted integer out of range for `char`".fmt(f)
268	}
269	}
270
271	/// Converts a digit in the given radix to a `char`. See [`char::from_digit`].
272	#[inline]
273	#[must_use]
274	pub(super) const fn from_digit(num: u32, radix: u32) -> Option<char> {
275	if radix > `36` {
276	panic!("from_digit: radix is too high (maximum 36)");
277	}
278	if num < radix {
279	let num: u8 = num as u8;
280	if num < `10` { Some((b'0' + num) as char) } else { Some((b'a' + num - `10`) as char) }
281	} else {
282	None
283	}
284	}
285

Provided by KDAB

Definitions