mod.rs source code [crates/core/src/char/mod.rs]

1	//! Utilities for the `char` primitive type.
2	//!
3	//! *[See also the `char` primitive type](primitive@char).*
4	//!
5	//! The `char` type represents a single character. More specifically, since
6	//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
7	//! scalar value]', which is similar to, but not the same as, a '[Unicode code
8	//! point]'.
9	//!
10	//! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
11	//! [Unicode code point]: https://www.unicode.org/glossary/#code_point
12	//!
13	//! This module exists for technical reasons, the primary documentation for
14	//! `char` is directly on [the `char` primitive type][char] itself.
15	//!
16	//! This module is the home of the iterator implementations for the iterators
17	//! implemented on `char`, as well as some useful constants and conversion
18	//! functions that convert various types to `char`.
19
20	#![allow(non_snake_case)]
21	#![stable(feature = "core_char", since = "1.2.0")]
22
23	mod convert;
24	mod decode;
25	mod methods;
26
27	// stable re-exports
28	#[stable(feature = "try_from", since = "1.34.0")]
29	pub use self::convert::CharTryFromError;
30	#[stable(feature = "char_from_str", since = "1.20.0")]
31	pub use self::convert::ParseCharError;
32	#[stable(feature = "decode_utf16", since = "1.9.0")]
33	pub use self::decode::{DecodeUtf16, DecodeUtf16Error};
34
35	// perma-unstable re-exports
36	#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
37	pub use self::methods::encode_utf16_raw;
38	#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
39	pub use self::methods::encode_utf8_raw;
40
41	use crate::ascii;
42	use crate::error::Error;
43	use crate::escape;
44	use crate::fmt::{self, Write};
45	use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce};
46	use crate::num::NonZero;
47
48	pub(crate) use self::methods::EscapeDebugExtArgs;
49
50	// UTF-8 ranges and tags for encoding characters
51	const TAG_CONT: u8 = `0b1000_0000`;
52	const TAG_TWO_B: u8 = `0b1100_0000`;
53	const TAG_THREE_B: u8 = `0b1110_0000`;
54	const TAG_FOUR_B: u8 = `0b1111_0000`;
55	const MAX_ONE_B: u32 = `0x80`;
56	const MAX_TWO_B: u32 = `0x800`;
57	const MAX_THREE_B: u32 = `0x10000`;
58
59	/*
60	Lu Uppercase_Letter an uppercase letter
61	Ll Lowercase_Letter a lowercase letter
62	Lt Titlecase_Letter a digraphic character, with first part uppercase
63	Lm Modifier_Letter a modifier letter
64	Lo Other_Letter other letters, including syllables and ideographs
65	Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
66	Mc Spacing_Mark a spacing combining mark (positive advance width)
67	Me Enclosing_Mark an enclosing combining mark
68	Nd Decimal_Number a decimal digit
69	Nl Letter_Number a letterlike numeric character
70	No Other_Number a numeric character of other type
71	Pc Connector_Punctuation a connecting punctuation mark, like a tie
72	Pd Dash_Punctuation a dash or hyphen punctuation mark
73	Ps Open_Punctuation an opening punctuation mark (of a pair)
74	Pe Close_Punctuation a closing punctuation mark (of a pair)
75	Pi Initial_Punctuation an initial quotation mark
76	Pf Final_Punctuation a final quotation mark
77	Po Other_Punctuation a punctuation mark of other type
78	Sm Math_Symbol a symbol of primarily mathematical use
79	Sc Currency_Symbol a currency sign
80	Sk Modifier_Symbol a non-letterlike modifier symbol
81	So Other_Symbol a symbol of other type
82	Zs Space_Separator a space character (of various non-zero widths)
83	Zl Line_Separator U+2028 LINE SEPARATOR only
84	Zp Paragraph_Separator U+2029 PARAGRAPH SEPARATOR only
85	Cc Control a C0 or C1 control code
86	Cf Format a format control character
87	Cs Surrogate a surrogate code point
88	Co Private_Use a private-use character
89	Cn Unassigned a reserved unassigned code point or a noncharacter
90	*/
91
92	/// The highest valid code point a `char` can have, `'\u{10FFFF}'`. Use [`char::MAX`] instead.
93	#[stable(feature = "rust1", since = "1.0.0")]
94	pub const MAX: char = char::MAX;
95
96	/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
97	/// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead.
98	#[stable(feature = "decode_utf16", since = "1.9.0")]
99	pub const REPLACEMENT_CHARACTER: char = char::REPLACEMENT_CHARACTER;
100
101	/// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
102	/// `char` and `str` methods are based on. Use [`char::UNICODE_VERSION`] instead.
103	#[stable(feature = "unicode_version", since = "1.45.0")]
104	pub const UNICODE_VERSION: (u8, u8, u8) = char::UNICODE_VERSION;
105
106	/// Creates an iterator over the UTF-16 encoded code points in `iter`, returning
107	/// unpaired surrogates as `Err`s. Use [`char::decode_utf16`] instead.
108	#[stable(feature = "decode_utf16", since = "1.9.0")]
109	#[inline]
110	pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
111	self::decode::decode_utf16(iter)
112	}
113
114	/// Converts a `u32` to a `char`. Use [`char::from_u32`] instead.
115	#[stable(feature = "rust1", since = "1.0.0")]
116	#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
117	#[must_use]
118	#[inline]
119	pub const fn from_u32(i: u32) -> Option<char> {
120	self::convert::from_u32(i)
121	}
122
123	/// Converts a `u32` to a `char`, ignoring validity. Use [`char::from_u32_unchecked`].
124	/// instead.
125	#[stable(feature = "char_from_unchecked", since = "1.5.0")]
126	#[rustc_const_unstable(feature = "const_char_from_u32_unchecked", issue = "89259")]
127	#[must_use]
128	#[inline]
129	pub const unsafe fn from_u32_unchecked(i: u32) -> char {
130	// SAFETY: the safety contract must be upheld by the caller.
131	unsafe { self::convert::from_u32_unchecked(i) }
132	}
133
134	/// Converts a digit in the given radix to a `char`. Use [`char::from_digit`] instead.
135	#[stable(feature = "rust1", since = "1.0.0")]
136	#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
137	#[must_use]
138	#[inline]
139	pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
140	self::convert::from_digit(num, radix)
141	}
142
143	/// Returns an iterator that yields the hexadecimal Unicode escape of a
144	/// character, as `char`s.
145	///
146	/// This `struct` is created by the [`escape_unicode`] method on [`char`]. See
147	/// its documentation for more.
148	///
149	/// [`escape_unicode`]: char::escape_unicode
150	#[derive(Clone, Debug)]
151	#[stable(feature = "rust1", since = "1.0.0")]
152	pub struct EscapeUnicode(escape::EscapeIterInner<`10`>);
153
154	impl EscapeUnicode {
155	fn new(chr: char) -> Self {
156	let mut data: [AsciiChar; 10] = [ascii::Char::Null; `10`];
157	let range: Range = escape::escape_unicode_into(&mut data, ch:chr);
158	Self(escape::EscapeIterInner::new(data, alive:range))
159	}
160	}
161
162	#[stable(feature = "rust1", since = "1.0.0")]
163	impl Iterator for EscapeUnicode {
164	type Item = char;
165
166	#[inline]
167	fn next(&mut self) -> Option<char> {
168	self.0.next().map(char::from)
169	}
170
171	#[inline]
172	fn size_hint(&self) -> (usize, Option<usize>) {
173	let n = self.0.len();
174	(n, Some(n))
175	}
176
177	#[inline]
178	fn count(self) -> usize {
179	self.0.len()
180	}
181
182	#[inline]
183	fn last(mut self) -> Option<char> {
184	self.0.next_back().map(char::from)
185	}
186
187	#[inline]
188	fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
189	self.0.advance_by(n)
190	}
191	}
192
193	#[stable(feature = "exact_size_escape", since = "1.11.0")]
194	impl ExactSizeIterator for EscapeUnicode {
195	#[inline]
196	fn len(&self) -> usize {
197	self.0.len()
198	}
199	}
200
201	#[stable(feature = "fused", since = "1.26.0")]
202	impl FusedIterator for EscapeUnicode {}
203
204	#[stable(feature = "char_struct_display", since = "1.16.0")]
205	impl fmt::Display for EscapeUnicode {
206	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
207	f.write_str(self.0.as_str())
208	}
209	}
210
211	/// An iterator that yields the literal escape code of a `char`.
212	///
213	/// This `struct` is created by the [`escape_default`] method on [`char`]. See
214	/// its documentation for more.
215	///
216	/// [`escape_default`]: char::escape_default
217	#[derive(Clone, Debug)]
218	#[stable(feature = "rust1", since = "1.0.0")]
219	pub struct EscapeDefault(escape::EscapeIterInner<`10`>);
220
221	impl EscapeDefault {
222	fn printable(chr: ascii::Char) -> Self {
223	let data: [AsciiChar; 1] = [chr];
224	Self(escape::EscapeIterInner::from_array(data))
225	}
226
227	fn backslash(chr: ascii::Char) -> Self {
228	let data: [AsciiChar; 2] = [ascii::Char::ReverseSolidus, chr];
229	Self(escape::EscapeIterInner::from_array(data))
230	}
231
232	fn from_unicode(esc: EscapeUnicode) -> Self {
233	Self(esc.0)
234	}
235	}
236
237	#[stable(feature = "rust1", since = "1.0.0")]
238	impl Iterator for EscapeDefault {
239	type Item = char;
240
241	#[inline]
242	fn next(&mut self) -> Option<char> {
243	self.0.next().map(char::from)
244	}
245
246	#[inline]
247	fn size_hint(&self) -> (usize, Option<usize>) {
248	let n = self.0.len();
249	(n, Some(n))
250	}
251
252	#[inline]
253	fn count(self) -> usize {
254	self.0.len()
255	}
256
257	#[inline]
258	fn last(mut self) -> Option<char> {
259	self.0.next_back().map(char::from)
260	}
261
262	#[inline]
263	fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
264	self.0.advance_by(n)
265	}
266	}
267
268	#[stable(feature = "exact_size_escape", since = "1.11.0")]
269	impl ExactSizeIterator for EscapeDefault {
270	#[inline]
271	fn len(&self) -> usize {
272	self.0.len()
273	}
274	}
275
276	#[stable(feature = "fused", since = "1.26.0")]
277	impl FusedIterator for EscapeDefault {}
278
279	#[stable(feature = "char_struct_display", since = "1.16.0")]
280	impl fmt::Display for EscapeDefault {
281	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
282	f.write_str(self.0.as_str())
283	}
284	}
285
286	/// An iterator that yields the literal escape code of a `char`.
287	///
288	/// This `struct` is created by the [`escape_debug`] method on [`char`]. See its
289	/// documentation for more.
290	///
291	/// [`escape_debug`]: char::escape_debug
292	#[stable(feature = "char_escape_debug", since = "1.20.0")]
293	#[derive(Clone, Debug)]
294	pub struct EscapeDebug(EscapeDebugInner);
295
296	#[derive(Clone, Debug)]
297	// Note: It’s possible to manually encode the EscapeDebugInner inside of
298	// EscapeIterInner (e.g. with alive=254..255 indicating that data[0..4] holds
299	// a char) which would likely result in a more optimised code. For now we use
300	// the option easier to implement.
301	enum EscapeDebugInner {
302	Bytes(escape::EscapeIterInner<`10`>),
303	Char(char),
304	}
305
306	impl EscapeDebug {
307	fn printable(chr: char) -> Self {
308	Self(EscapeDebugInner::Char(chr))
309	}
310
311	fn backslash(chr: ascii::Char) -> Self {
312	let data: [AsciiChar; 2] = [ascii::Char::ReverseSolidus, chr];
313	let iter: EscapeIterInner<10> = escape::EscapeIterInner::from_array(data);
314	Self(EscapeDebugInner::Bytes(iter))
315	}
316
317	fn from_unicode(esc: EscapeUnicode) -> Self {
318	Self(EscapeDebugInner::Bytes(esc.0))
319	}
320
321	fn clear(&mut self) {
322	let bytes: EscapeIterInner<10> = escape::EscapeIterInner::from_array([]);
323	self.0 = EscapeDebugInner::Bytes(bytes);
324	}
325	}
326
327	#[stable(feature = "char_escape_debug", since = "1.20.0")]
328	impl Iterator for EscapeDebug {
329	type Item = char;
330
331	#[inline]
332	fn next(&mut self) -> Option<char> {
333	match self.0 {
334	EscapeDebugInner::Bytes(ref mut bytes: &mut EscapeIterInner<10>) => bytes.next().map(char::from),
335	EscapeDebugInner::Char(chr: char) => {
336	self.clear();
337	Some(chr)
338	}
339	}
340	}
341
342	fn size_hint(&self) -> (usize, Option<usize>) {
343	let n: usize = self.len();
344	(n, Some(n))
345	}
346
347	#[inline]
348	fn count(self) -> usize {
349	self.len()
350	}
351	}
352
353	#[stable(feature = "char_escape_debug", since = "1.20.0")]
354	impl ExactSizeIterator for EscapeDebug {
355	fn len(&self) -> usize {
356	match &self.0 {
357	EscapeDebugInner::Bytes(bytes: &EscapeIterInner<10>) => bytes.len(),
358	EscapeDebugInner::Char(_) => `1`,
359	}
360	}
361	}
362
363	#[stable(feature = "fused", since = "1.26.0")]
364	impl FusedIterator for EscapeDebug {}
365
366	#[stable(feature = "char_escape_debug", since = "1.20.0")]
367	impl fmt::Display for EscapeDebug {
368	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
369	match &self.0 {
370	EscapeDebugInner::Bytes(bytes: &EscapeIterInner<10>) => f.write_str(data:bytes.as_str()),
371	EscapeDebugInner::Char(chr: &char) => f.write_char(*chr),
372	}
373	}
374	}
375
376	macro_rules! casemappingiter_impls {
377	($(#[$attr:meta])* $ITER_NAME:ident) => {
378	$(#[$attr])*
379	#[stable(feature = "rust1", since = "1.0.0")]
380	#[derive(Debug, Clone)]
381	pub struct $ITER_NAME(CaseMappingIter);
382
383	#[stable(feature = "rust1", since = "1.0.0")]
384	impl Iterator for $ITER_NAME {
385	type Item = char;
386	fn next(&mut self) -> Option<char> {
387	self.`0`.next()
388	}
389
390	fn size_hint(&self) -> (usize, Option<usize>) {
391	self.`0`.size_hint()
392	}
393
394	fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
395	where
396	Fold: FnMut(Acc, Self::Item) -> Acc,
397	{
398	self.`0`.fold(init, fold)
399	}
400
401	fn count(self) -> usize {
402	self.`0`.count()
403	}
404
405	fn last(self) -> Option<Self::Item> {
406	self.`0`.last()
407	}
408
409	fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
410	self.`0`.advance_by(n)
411	}
412
413	unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
414	// SAFETY: just forwarding requirements to caller
415	unsafe { self.`0`.__iterator_get_unchecked(idx) }
416	}
417	}
418
419	#[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
420	impl DoubleEndedIterator for $ITER_NAME {
421	fn next_back(&mut self) -> Option<char> {
422	self.`0`.next_back()
423	}
424
425	fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
426	where
427	Fold: FnMut(Acc, Self::Item) -> Acc,
428	{
429	self.`0`.rfold(init, rfold)
430	}
431
432	fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
433	self.`0`.advance_back_by(n)
434	}
435	}
436
437	#[stable(feature = "fused", since = "1.26.0")]
438	impl FusedIterator for $ITER_NAME {}
439
440	#[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
441	impl ExactSizeIterator for $ITER_NAME {
442	fn len(&self) -> usize {
443	self.`0`.len()
444	}
445
446	fn is_empty(&self) -> bool {
447	self.`0`.is_empty()
448	}
449	}
450
451	// SAFETY: forwards to inner `array::IntoIter`
452	#[unstable(feature = "trusted_len", issue = "37572")]
453	unsafe impl TrustedLen for $ITER_NAME {}
454
455	// SAFETY: forwards to inner `array::IntoIter`
456	#[doc(hidden)]
457	#[unstable(feature = "std_internals", issue = "none")]
458	unsafe impl TrustedRandomAccessNoCoerce for $ITER_NAME {
459	const MAY_HAVE_SIDE_EFFECT: bool = `false`;
460	}
461
462	// SAFETY: this iter has no subtypes/supertypes
463	#[doc(hidden)]
464	#[unstable(feature = "std_internals", issue = "none")]
465	unsafe impl TrustedRandomAccess for $ITER_NAME {}
466
467	#[stable(feature = "char_struct_display", since = "1.16.0")]
468	impl fmt::Display for $ITER_NAME {
469	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
470	fmt::Display::fmt(&self.`0`, f)
471	}
472	}
473	}
474	}
475
476	casemappingiter_impls! {
477	/// Returns an iterator that yields the lowercase equivalent of a `char`.
478	///
479	/// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
480	/// its documentation for more.
481	///
482	/// [`to_lowercase`]: char::to_lowercase
483	ToLowercase
484	}
485
486	casemappingiter_impls! {
487	/// Returns an iterator that yields the uppercase equivalent of a `char`.
488	///
489	/// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
490	/// its documentation for more.
491	///
492	/// [`to_uppercase`]: char::to_uppercase
493	ToUppercase
494	}
495
496	#[derive(Debug, Clone)]
497	struct CaseMappingIter(core::array::IntoIter<char, `3`>);
498
499	impl CaseMappingIter {
500	#[inline]
501	fn new(chars: [char; `3`]) -> CaseMappingIter {
502	let mut iter: IntoIter = chars.into_iter();
503	if chars[`2`] == '`\0`' {
504	iter.next_back();
505	if chars[`1`] == '`\0`' {
506	iter.next_back();
507
508	// Deliberately don't check `chars[0]`,
509	// as '\0' lowercases to itself
510	}
511	}
512	CaseMappingIter(iter)
513	}
514	}
515
516	impl Iterator for CaseMappingIter {
517	type Item = char;
518
519	fn next(&mut self) -> Option<char> {
520	self.0.next()
521	}
522
523	fn size_hint(&self) -> (usize, Option<usize>) {
524	self.0.size_hint()
525	}
526
527	fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
528	where
529	Fold: FnMut(Acc, Self::Item) -> Acc,
530	{
531	self.0.fold(init, fold)
532	}
533
534	fn count(self) -> usize {
535	self.0.count()
536	}
537
538	fn last(self) -> Option<Self::Item> {
539	self.0.last()
540	}
541
542	fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
543	self.0.advance_by(n)
544	}
545
546	unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
547	// SAFETY: just forwarding requirements to caller
548	unsafe { self.0.__iterator_get_unchecked(idx) }
549	}
550	}
551
552	impl DoubleEndedIterator for CaseMappingIter {
553	fn next_back(&mut self) -> Option<char> {
554	self.0.next_back()
555	}
556
557	fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
558	where
559	Fold: FnMut(Acc, Self::Item) -> Acc,
560	{
561	self.0.rfold(init, f:rfold)
562	}
563
564	fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
565	self.0.advance_back_by(n)
566	}
567	}
568
569	impl ExactSizeIterator for CaseMappingIter {
570	fn len(&self) -> usize {
571	self.0.len()
572	}
573
574	fn is_empty(&self) -> bool {
575	self.0.is_empty()
576	}
577	}
578
579	impl FusedIterator for CaseMappingIter {}
580
581	// SAFETY: forwards to inner `array::IntoIter`
582	unsafe impl TrustedLen for CaseMappingIter {}
583
584	// SAFETY: forwards to inner `array::IntoIter`
585	unsafe impl TrustedRandomAccessNoCoerce for CaseMappingIter {
586	const MAY_HAVE_SIDE_EFFECT: bool = `false`;
587	}
588
589	// SAFETY: `CaseMappingIter` has no subtypes/supertypes
590	unsafe impl TrustedRandomAccess for CaseMappingIter {}
591
592	impl fmt::Display for CaseMappingIter {
593	#[inline]
594	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
595	for c: char in self.0.clone() {
596	f.write_char(c)?;
597	}
598	Ok(())
599	}
600	}
601
602	/// The error type returned when a checked char conversion fails.
603	#[stable(feature = "u8_from_char", since = "1.59.0")]
604	#[derive(Debug, Copy, Clone, PartialEq, Eq)]
605	pub struct TryFromCharError(pub(crate) ());
606
607	#[stable(feature = "u8_from_char", since = "1.59.0")]
608	impl fmt::Display for TryFromCharError {
609	fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
610	"unicode code point out of range".fmt(fmt)
611	}
612	}
613
614	#[stable(feature = "u8_from_char", since = "1.59.0")]
615	impl Error for TryFromCharError {}
616