mod.rs source code [crates/core/src/char/mod.rs]

1	//! Utilities for the `char` primitive type.
2	//!
3	//! *[See also the `char` primitive type](primitive@char).*
4	//!
5	//! The `char` type represents a single character. More specifically, since
6	//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
7	//! scalar value]', which is similar to, but not the same as, a '[Unicode code
8	//! point]'.
9	//!
10	//! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
11	//! [Unicode code point]: https://www.unicode.org/glossary/#code_point
12	//!
13	//! This module exists for technical reasons, the primary documentation for
14	//! `char` is directly on [the `char` primitive type][char] itself.
15	//!
16	//! This module is the home of the iterator implementations for the iterators
17	//! implemented on `char`, as well as some useful constants and conversion
18	//! functions that convert various types to `char`.
19
20	#![allow(non_snake_case)]
21	#![stable(feature = "rust1", since = "1.0.0")]
22
23	mod convert;
24	mod decode;
25	mod methods;
26
27	// stable re-exports
28	#[rustfmt::skip]
29	#[stable(feature = "try_from", since = "1.34.0")]
30	pub use self::convert::CharTryFromError;
31	#[stable(feature = "char_from_str", since = "1.20.0")]
32	pub use self::convert::ParseCharError;
33	#[stable(feature = "decode_utf16", since = "1.9.0")]
34	pub use self::decode::{DecodeUtf16, DecodeUtf16Error};
35
36	// perma-unstable re-exports
37	#[rustfmt::skip]
38	#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
39	pub use self::methods::encode_utf16_raw; // perma-unstable
40	#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
41	pub use self::methods::encode_utf8_raw; // perma-unstable
42
43	#[rustfmt::skip]
44	use crate::ascii;
45	pub(crate) use self::methods::EscapeDebugExtArgs;
46	use crate::error::Error;
47	use crate::escape;
48	use crate::fmt::{self, Write};
49	use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce};
50	use crate::num::NonZero;
51
52	// UTF-8 ranges and tags for encoding characters
53	const TAG_CONT: u8 = `0b1000_0000`;
54	const TAG_TWO_B: u8 = `0b1100_0000`;
55	const TAG_THREE_B: u8 = `0b1110_0000`;
56	const TAG_FOUR_B: u8 = `0b1111_0000`;
57	const MAX_ONE_B: u32 = `0x80`;
58	const MAX_TWO_B: u32 = `0x800`;
59	const MAX_THREE_B: u32 = `0x10000`;
60
61	/*
62	Lu Uppercase_Letter an uppercase letter
63	Ll Lowercase_Letter a lowercase letter
64	Lt Titlecase_Letter a digraphic character, with first part uppercase
65	Lm Modifier_Letter a modifier letter
66	Lo Other_Letter other letters, including syllables and ideographs
67	Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
68	Mc Spacing_Mark a spacing combining mark (positive advance width)
69	Me Enclosing_Mark an enclosing combining mark
70	Nd Decimal_Number a decimal digit
71	Nl Letter_Number a letterlike numeric character
72	No Other_Number a numeric character of other type
73	Pc Connector_Punctuation a connecting punctuation mark, like a tie
74	Pd Dash_Punctuation a dash or hyphen punctuation mark
75	Ps Open_Punctuation an opening punctuation mark (of a pair)
76	Pe Close_Punctuation a closing punctuation mark (of a pair)
77	Pi Initial_Punctuation an initial quotation mark
78	Pf Final_Punctuation a final quotation mark
79	Po Other_Punctuation a punctuation mark of other type
80	Sm Math_Symbol a symbol of primarily mathematical use
81	Sc Currency_Symbol a currency sign
82	Sk Modifier_Symbol a non-letterlike modifier symbol
83	So Other_Symbol a symbol of other type
84	Zs Space_Separator a space character (of various non-zero widths)
85	Zl Line_Separator U+2028 LINE SEPARATOR only
86	Zp Paragraph_Separator U+2029 PARAGRAPH SEPARATOR only
87	Cc Control a C0 or C1 control code
88	Cf Format a format control character
89	Cs Surrogate a surrogate code point
90	Co Private_Use a private-use character
91	Cn Unassigned a reserved unassigned code point or a noncharacter
92	*/
93
94	/// The highest valid code point a `char` can have, `'\u{10FFFF}'`. Use [`char::MAX`] instead.
95	#[stable(feature = "rust1", since = "1.0.0")]
96	pub const MAX: char = char::MAX;
97
98	/// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
99	/// UTF-8 encoding.
100	#[unstable(feature = "char_max_len", issue = "121714")]
101	pub const MAX_LEN_UTF8: usize = char::MAX_LEN_UTF8;
102
103	/// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
104	/// to UTF-16 encoding.
105	#[unstable(feature = "char_max_len", issue = "121714")]
106	pub const MAX_LEN_UTF16: usize = char::MAX_LEN_UTF16;
107
108	/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
109	/// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead.
110	#[stable(feature = "decode_utf16", since = "1.9.0")]
111	pub const REPLACEMENT_CHARACTER: char = char::REPLACEMENT_CHARACTER;
112
113	/// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
114	/// `char` and `str` methods are based on. Use [`char::UNICODE_VERSION`] instead.
115	#[stable(feature = "unicode_version", since = "1.45.0")]
116	pub const UNICODE_VERSION: (u8, u8, u8) = char::UNICODE_VERSION;
117
118	/// Creates an iterator over the UTF-16 encoded code points in `iter`, returning
119	/// unpaired surrogates as `Err`s. Use [`char::decode_utf16`] instead.
120	#[stable(feature = "decode_utf16", since = "1.9.0")]
121	#[inline]
122	pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
123	self::decode::decode_utf16(iter)
124	}
125
126	/// Converts a `u32` to a `char`. Use [`char::from_u32`] instead.
127	#[stable(feature = "rust1", since = "1.0.0")]
128	#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
129	#[must_use]
130	#[inline]
131	pub const fn from_u32(i: u32) -> Option<char> {
132	self::convert::from_u32(i)
133	}
134
135	/// Converts a `u32` to a `char`, ignoring validity. Use [`char::from_u32_unchecked`]
136	/// instead.
137	#[stable(feature = "char_from_unchecked", since = "1.5.0")]
138	#[rustc_const_stable(feature = "const_char_from_u32_unchecked", since = "1.81.0")]
139	#[must_use]
140	#[inline]
141	pub const unsafe fn from_u32_unchecked(i: u32) -> char {
142	// SAFETY: the safety contract must be upheld by the caller.
143	unsafe { self::convert::from_u32_unchecked(i) }
144	}
145
146	/// Converts a digit in the given radix to a `char`. Use [`char::from_digit`] instead.
147	#[stable(feature = "rust1", since = "1.0.0")]
148	#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
149	#[must_use]
150	#[inline]
151	pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
152	self::convert::from_digit(num, radix)
153	}
154
155	/// Returns an iterator that yields the hexadecimal Unicode escape of a
156	/// character, as `char`s.
157	///
158	/// This `struct` is created by the [`escape_unicode`] method on [`char`]. See
159	/// its documentation for more.
160	///
161	/// [`escape_unicode`]: char::escape_unicode
162	#[derive(Clone, Debug)]
163	#[stable(feature = "rust1", since = "1.0.0")]
164	pub struct EscapeUnicode(escape::EscapeIterInner<`10`>);
165
166	impl EscapeUnicode {
167	#[inline]
168	const fn new(c: char) -> Self {
169	Self(escape::EscapeIterInner::unicode(c))
170	}
171	}
172
173	#[stable(feature = "rust1", since = "1.0.0")]
174	impl Iterator for EscapeUnicode {
175	type Item = char;
176
177	#[inline]
178	fn next(&mut self) -> Option<char> {
179	self.0.next().map(char::from)
180	}
181
182	#[inline]
183	fn size_hint(&self) -> (usize, Option<usize>) {
184	let n = self.0.len();
185	(n, Some(n))
186	}
187
188	#[inline]
189	fn count(self) -> usize {
190	self.0.len()
191	}
192
193	#[inline]
194	fn last(mut self) -> Option<char> {
195	self.0.next_back().map(char::from)
196	}
197
198	#[inline]
199	fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
200	self.0.advance_by(n)
201	}
202	}
203
204	#[stable(feature = "exact_size_escape", since = "1.11.0")]
205	impl ExactSizeIterator for EscapeUnicode {
206	#[inline]
207	fn len(&self) -> usize {
208	self.0.len()
209	}
210	}
211
212	#[stable(feature = "fused", since = "1.26.0")]
213	impl FusedIterator for EscapeUnicode {}
214
215	#[stable(feature = "char_struct_display", since = "1.16.0")]
216	impl fmt::Display for EscapeUnicode {
217	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
218	f.write_str(self.0.as_str())
219	}
220	}
221
222	/// An iterator that yields the literal escape code of a `char`.
223	///
224	/// This `struct` is created by the [`escape_default`] method on [`char`]. See
225	/// its documentation for more.
226	///
227	/// [`escape_default`]: char::escape_default
228	#[derive(Clone, Debug)]
229	#[stable(feature = "rust1", since = "1.0.0")]
230	pub struct EscapeDefault(escape::EscapeIterInner<`10`>);
231
232	impl EscapeDefault {
233	#[inline]
234	const fn printable(c: ascii::Char) -> Self {
235	Self(escape::EscapeIterInner::ascii(c.to_u8()))
236	}
237
238	#[inline]
239	const fn backslash(c: ascii::Char) -> Self {
240	Self(escape::EscapeIterInner::backslash(c))
241	}
242
243	#[inline]
244	const fn unicode(c: char) -> Self {
245	Self(escape::EscapeIterInner::unicode(c))
246	}
247	}
248
249	#[stable(feature = "rust1", since = "1.0.0")]
250	impl Iterator for EscapeDefault {
251	type Item = char;
252
253	#[inline]
254	fn next(&mut self) -> Option<char> {
255	self.0.next().map(char::from)
256	}
257
258	#[inline]
259	fn size_hint(&self) -> (usize, Option<usize>) {
260	let n = self.0.len();
261	(n, Some(n))
262	}
263
264	#[inline]
265	fn count(self) -> usize {
266	self.0.len()
267	}
268
269	#[inline]
270	fn last(mut self) -> Option<char> {
271	self.0.next_back().map(char::from)
272	}
273
274	#[inline]
275	fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
276	self.0.advance_by(n)
277	}
278	}
279
280	#[stable(feature = "exact_size_escape", since = "1.11.0")]
281	impl ExactSizeIterator for EscapeDefault {
282	#[inline]
283	fn len(&self) -> usize {
284	self.0.len()
285	}
286	}
287
288	#[stable(feature = "fused", since = "1.26.0")]
289	impl FusedIterator for EscapeDefault {}
290
291	#[stable(feature = "char_struct_display", since = "1.16.0")]
292	impl fmt::Display for EscapeDefault {
293	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
294	f.write_str(self.0.as_str())
295	}
296	}
297
298	/// An iterator that yields the literal escape code of a `char`.
299	///
300	/// This `struct` is created by the [`escape_debug`] method on [`char`]. See its
301	/// documentation for more.
302	///
303	/// [`escape_debug`]: char::escape_debug
304	#[stable(feature = "char_escape_debug", since = "1.20.0")]
305	#[derive(Clone, Debug)]
306	pub struct EscapeDebug(EscapeDebugInner);
307
308	#[derive(Clone, Debug)]
309	// Note: It’s possible to manually encode the EscapeDebugInner inside of
310	// EscapeIterInner (e.g. with alive=254..255 indicating that data[0..4] holds
311	// a char) which would likely result in a more optimised code. For now we use
312	// the option easier to implement.
313	enum EscapeDebugInner {
314	Bytes(escape::EscapeIterInner<`10`>),
315	Char(char),
316	}
317
318	impl EscapeDebug {
319	#[inline]
320	const fn printable(chr: char) -> Self {
321	Self(EscapeDebugInner::Char(chr))
322	}
323
324	#[inline]
325	const fn backslash(c: ascii::Char) -> Self {
326	Self(EscapeDebugInner::Bytes(escape::EscapeIterInner::backslash(c)))
327	}
328
329	#[inline]
330	const fn unicode(c: char) -> Self {
331	Self(EscapeDebugInner::Bytes(escape::EscapeIterInner::unicode(c)))
332	}
333
334	#[inline]
335	fn clear(&mut self) {
336	self.0 = EscapeDebugInner::Bytes(escape::EscapeIterInner::empty());
337	}
338	}
339
340	#[stable(feature = "char_escape_debug", since = "1.20.0")]
341	impl Iterator for EscapeDebug {
342	type Item = char;
343
344	#[inline]
345	fn next(&mut self) -> Option<char> {
346	match self.0 {
347	EscapeDebugInner::Bytes(ref mut bytes) => bytes.next().map(char::from),
348	EscapeDebugInner::Char(chr) => {
349	self.clear();
350	Some(chr)
351	}
352	}
353	}
354
355	#[inline]
356	fn size_hint(&self) -> (usize, Option<usize>) {
357	let n = self.len();
358	(n, Some(n))
359	}
360
361	#[inline]
362	fn count(self) -> usize {
363	self.len()
364	}
365	}
366
367	#[stable(feature = "char_escape_debug", since = "1.20.0")]
368	impl ExactSizeIterator for EscapeDebug {
369	fn len(&self) -> usize {
370	match &self.0 {
371	EscapeDebugInner::Bytes(bytes: &EscapeIterInner<10>) => bytes.len(),
372	EscapeDebugInner::Char(_) => `1`,
373	}
374	}
375	}
376
377	#[stable(feature = "fused", since = "1.26.0")]
378	impl FusedIterator for EscapeDebug {}
379
380	#[stable(feature = "char_escape_debug", since = "1.20.0")]
381	impl fmt::Display for EscapeDebug {
382	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
383	match &self.0 {
384	EscapeDebugInner::Bytes(bytes: &EscapeIterInner<10>) => f.write_str(data:bytes.as_str()),
385	EscapeDebugInner::Char(chr: &char) => f.write_char(*chr),
386	}
387	}
388	}
389
390	macro_rules! casemappingiter_impls {
391	($(#[$attr:meta])* $ITER_NAME:ident) => {
392	$(#[$attr])*
393	#[stable(feature = "rust1", since = "1.0.0")]
394	#[derive(Debug, Clone)]
395	pub struct $ITER_NAME(CaseMappingIter);
396
397	#[stable(feature = "rust1", since = "1.0.0")]
398	impl Iterator for $ITER_NAME {
399	type Item = char;
400	fn next(&mut self) -> Option<char> {
401	self.`0`.next()
402	}
403
404	fn size_hint(&self) -> (usize, Option<usize>) {
405	self.`0`.size_hint()
406	}
407
408	fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
409	where
410	Fold: FnMut(Acc, Self::Item) -> Acc,
411	{
412	self.`0`.fold(init, fold)
413	}
414
415	fn count(self) -> usize {
416	self.`0`.count()
417	}
418
419	fn last(self) -> Option<Self::Item> {
420	self.`0`.last()
421	}
422
423	fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
424	self.`0`.advance_by(n)
425	}
426
427	unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
428	// SAFETY: just forwarding requirements to caller
429	unsafe { self.`0`.__iterator_get_unchecked(idx) }
430	}
431	}
432
433	#[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
434	impl DoubleEndedIterator for $ITER_NAME {
435	fn next_back(&mut self) -> Option<char> {
436	self.`0`.next_back()
437	}
438
439	fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
440	where
441	Fold: FnMut(Acc, Self::Item) -> Acc,
442	{
443	self.`0`.rfold(init, rfold)
444	}
445
446	fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
447	self.`0`.advance_back_by(n)
448	}
449	}
450
451	#[stable(feature = "fused", since = "1.26.0")]
452	impl FusedIterator for $ITER_NAME {}
453
454	#[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
455	impl ExactSizeIterator for $ITER_NAME {
456	fn len(&self) -> usize {
457	self.`0`.len()
458	}
459
460	fn is_empty(&self) -> bool {
461	self.`0`.is_empty()
462	}
463	}
464
465	// SAFETY: forwards to inner `array::IntoIter`
466	#[unstable(feature = "trusted_len", issue = "37572")]
467	unsafe impl TrustedLen for $ITER_NAME {}
468
469	// SAFETY: forwards to inner `array::IntoIter`
470	#[doc(hidden)]
471	#[unstable(feature = "std_internals", issue = "none")]
472	unsafe impl TrustedRandomAccessNoCoerce for $ITER_NAME {
473	const MAY_HAVE_SIDE_EFFECT: bool = `false`;
474	}
475
476	// SAFETY: this iter has no subtypes/supertypes
477	#[doc(hidden)]
478	#[unstable(feature = "std_internals", issue = "none")]
479	unsafe impl TrustedRandomAccess for $ITER_NAME {}
480
481	#[stable(feature = "char_struct_display", since = "1.16.0")]
482	impl fmt::Display for $ITER_NAME {
483	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
484	fmt::Display::fmt(&self.`0`, f)
485	}
486	}
487	}
488	}
489
490	casemappingiter_impls! {
491	/// Returns an iterator that yields the lowercase equivalent of a `char`.
492	///
493	/// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
494	/// its documentation for more.
495	///
496	/// [`to_lowercase`]: char::to_lowercase
497	ToLowercase
498	}
499
500	casemappingiter_impls! {
501	/// Returns an iterator that yields the uppercase equivalent of a `char`.
502	///
503	/// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
504	/// its documentation for more.
505	///
506	/// [`to_uppercase`]: char::to_uppercase
507	ToUppercase
508	}
509
510	#[derive(Debug, Clone)]
511	struct CaseMappingIter(core::array::IntoIter<char, `3`>);
512
513	impl CaseMappingIter {
514	#[inline]
515	fn new(chars: [char; `3`]) -> CaseMappingIter {
516	let mut iter: IntoIter = chars.into_iter();
517	if chars[`2`] == '`\0`' {
518	iter.next_back();
519	if chars[`1`] == '`\0`' {
520	iter.next_back();
521
522	// Deliberately don't check `chars[0]`,
523	// as '\0' lowercases to itself
524	}
525	}
526	CaseMappingIter(iter)
527	}
528	}
529
530	impl Iterator for CaseMappingIter {
531	type Item = char;
532
533	fn next(&mut self) -> Option<char> {
534	self.0.next()
535	}
536
537	fn size_hint(&self) -> (usize, Option<usize>) {
538	self.0.size_hint()
539	}
540
541	fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
542	where
543	Fold: FnMut(Acc, Self::Item) -> Acc,
544	{
545	self.0.fold(init, fold)
546	}
547
548	fn count(self) -> usize {
549	self.0.count()
550	}
551
552	fn last(self) -> Option<Self::Item> {
553	self.0.last()
554	}
555
556	fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
557	self.0.advance_by(n)
558	}
559
560	unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
561	// SAFETY: just forwarding requirements to caller
562	unsafe { self.0.__iterator_get_unchecked(idx) }
563	}
564	}
565
566	impl DoubleEndedIterator for CaseMappingIter {
567	fn next_back(&mut self) -> Option<char> {
568	self.0.next_back()
569	}
570
571	fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
572	where
573	Fold: FnMut(Acc, Self::Item) -> Acc,
574	{
575	self.0.rfold(init, f:rfold)
576	}
577
578	fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
579	self.0.advance_back_by(n)
580	}
581	}
582
583	impl ExactSizeIterator for CaseMappingIter {
584	fn len(&self) -> usize {
585	self.0.len()
586	}
587
588	fn is_empty(&self) -> bool {
589	self.0.is_empty()
590	}
591	}
592
593	impl FusedIterator for CaseMappingIter {}
594
595	// SAFETY: forwards to inner `array::IntoIter`
596	unsafe impl TrustedLen for CaseMappingIter {}
597
598	// SAFETY: forwards to inner `array::IntoIter`
599	unsafe impl TrustedRandomAccessNoCoerce for CaseMappingIter {
600	const MAY_HAVE_SIDE_EFFECT: bool = `false`;
601	}
602
603	// SAFETY: `CaseMappingIter` has no subtypes/supertypes
604	unsafe impl TrustedRandomAccess for CaseMappingIter {}
605
606	impl fmt::Display for CaseMappingIter {
607	#[inline]
608	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
609	for c: char in self.0.clone() {
610	f.write_char(c)?;
611	}
612	Ok(())
613	}
614	}
615
616	/// The error type returned when a checked char conversion fails.
617	#[stable(feature = "u8_from_char", since = "1.59.0")]
618	#[derive(Debug, Copy, Clone, PartialEq, Eq)]
619	pub struct TryFromCharError(pub(crate) ());
620
621	#[stable(feature = "u8_from_char", since = "1.59.0")]
622	impl fmt::Display for TryFromCharError {
623	fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
624	"unicode code point out of range".fmt(fmt)
625	}
626	}
627
628	#[stable(feature = "u8_from_char", since = "1.59.0")]
629	impl Error for TryFromCharError {}
630

Provided by KDAB

Definitions