1//! Utilities for the `char` primitive type.
2//!
3//! *[See also the `char` primitive type](primitive@char).*
4//!
5//! The `char` type represents a single character. More specifically, since
6//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
7//! scalar value]', which is similar to, but not the same as, a '[Unicode code
8//! point]'.
9//!
10//! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
11//! [Unicode code point]: https://www.unicode.org/glossary/#code_point
12//!
13//! This module exists for technical reasons, the primary documentation for
14//! `char` is directly on [the `char` primitive type][char] itself.
15//!
16//! This module is the home of the iterator implementations for the iterators
17//! implemented on `char`, as well as some useful constants and conversion
18//! functions that convert various types to `char`.
19
20#![allow(non_snake_case)]
21#![stable(feature = "rust1", since = "1.0.0")]
22
23mod convert;
24mod decode;
25mod methods;
26
27// stable re-exports
28#[rustfmt::skip]
29#[stable(feature = "try_from", since = "1.34.0")]
30pub use self::convert::CharTryFromError;
31#[stable(feature = "char_from_str", since = "1.20.0")]
32pub use self::convert::ParseCharError;
33#[stable(feature = "decode_utf16", since = "1.9.0")]
34pub use self::decode::{DecodeUtf16, DecodeUtf16Error};
35
36// perma-unstable re-exports
37#[rustfmt::skip]
38#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
39pub use self::methods::encode_utf16_raw; // perma-unstable
40#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
41pub use self::methods::{encode_utf8_raw, encode_utf8_raw_unchecked}; // perma-unstable
42
43#[rustfmt::skip]
44use crate::ascii;
45pub(crate) use self::methods::EscapeDebugExtArgs;
46use crate::error::Error;
47use crate::escape::{AlwaysEscaped, EscapeIterInner, MaybeEscaped};
48use crate::fmt::{self, Write};
49use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce};
50use crate::num::NonZero;
51
52// UTF-8 ranges and tags for encoding characters
53const TAG_CONT: u8 = 0b1000_0000;
54const TAG_TWO_B: u8 = 0b1100_0000;
55const TAG_THREE_B: u8 = 0b1110_0000;
56const TAG_FOUR_B: u8 = 0b1111_0000;
57const MAX_ONE_B: u32 = 0x80;
58const MAX_TWO_B: u32 = 0x800;
59const MAX_THREE_B: u32 = 0x10000;
60
61/*
62 Lu Uppercase_Letter an uppercase letter
63 Ll Lowercase_Letter a lowercase letter
64 Lt Titlecase_Letter a digraphic character, with first part uppercase
65 Lm Modifier_Letter a modifier letter
66 Lo Other_Letter other letters, including syllables and ideographs
67 Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
68 Mc Spacing_Mark a spacing combining mark (positive advance width)
69 Me Enclosing_Mark an enclosing combining mark
70 Nd Decimal_Number a decimal digit
71 Nl Letter_Number a letterlike numeric character
72 No Other_Number a numeric character of other type
73 Pc Connector_Punctuation a connecting punctuation mark, like a tie
74 Pd Dash_Punctuation a dash or hyphen punctuation mark
75 Ps Open_Punctuation an opening punctuation mark (of a pair)
76 Pe Close_Punctuation a closing punctuation mark (of a pair)
77 Pi Initial_Punctuation an initial quotation mark
78 Pf Final_Punctuation a final quotation mark
79 Po Other_Punctuation a punctuation mark of other type
80 Sm Math_Symbol a symbol of primarily mathematical use
81 Sc Currency_Symbol a currency sign
82 Sk Modifier_Symbol a non-letterlike modifier symbol
83 So Other_Symbol a symbol of other type
84 Zs Space_Separator a space character (of various non-zero widths)
85 Zl Line_Separator U+2028 LINE SEPARATOR only
86 Zp Paragraph_Separator U+2029 PARAGRAPH SEPARATOR only
87 Cc Control a C0 or C1 control code
88 Cf Format a format control character
89 Cs Surrogate a surrogate code point
90 Co Private_Use a private-use character
91 Cn Unassigned a reserved unassigned code point or a noncharacter
92*/
93
94/// The highest valid code point a `char` can have, `'\u{10FFFF}'`. Use [`char::MAX`] instead.
95#[stable(feature = "rust1", since = "1.0.0")]
96pub const MAX: char = char::MAX;
97
98/// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
99/// UTF-8 encoding.
100#[unstable(feature = "char_max_len", issue = "121714")]
101pub const MAX_LEN_UTF8: usize = char::MAX_LEN_UTF8;
102
103/// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
104/// to UTF-16 encoding.
105#[unstable(feature = "char_max_len", issue = "121714")]
106pub const MAX_LEN_UTF16: usize = char::MAX_LEN_UTF16;
107
108/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
109/// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead.
110#[stable(feature = "decode_utf16", since = "1.9.0")]
111pub const REPLACEMENT_CHARACTER: char = char::REPLACEMENT_CHARACTER;
112
113/// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
114/// `char` and `str` methods are based on. Use [`char::UNICODE_VERSION`] instead.
115#[stable(feature = "unicode_version", since = "1.45.0")]
116pub const UNICODE_VERSION: (u8, u8, u8) = char::UNICODE_VERSION;
117
118/// Creates an iterator over the UTF-16 encoded code points in `iter`, returning
119/// unpaired surrogates as `Err`s. Use [`char::decode_utf16`] instead.
120#[stable(feature = "decode_utf16", since = "1.9.0")]
121#[inline]
122pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
123 self::decode::decode_utf16(iter)
124}
125
126/// Converts a `u32` to a `char`. Use [`char::from_u32`] instead.
127#[stable(feature = "rust1", since = "1.0.0")]
128#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
129#[must_use]
130#[inline]
131pub const fn from_u32(i: u32) -> Option<char> {
132 self::convert::from_u32(i)
133}
134
135/// Converts a `u32` to a `char`, ignoring validity. Use [`char::from_u32_unchecked`]
136/// instead.
137#[stable(feature = "char_from_unchecked", since = "1.5.0")]
138#[rustc_const_stable(feature = "const_char_from_u32_unchecked", since = "1.81.0")]
139#[must_use]
140#[inline]
141pub const unsafe fn from_u32_unchecked(i: u32) -> char {
142 // SAFETY: the safety contract must be upheld by the caller.
143 unsafe { self::convert::from_u32_unchecked(i) }
144}
145
146/// Converts a digit in the given radix to a `char`. Use [`char::from_digit`] instead.
147#[stable(feature = "rust1", since = "1.0.0")]
148#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
149#[must_use]
150#[inline]
151pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
152 self::convert::from_digit(num, radix)
153}
154
155/// Returns an iterator that yields the hexadecimal Unicode escape of a
156/// character, as `char`s.
157///
158/// This `struct` is created by the [`escape_unicode`] method on [`char`]. See
159/// its documentation for more.
160///
161/// [`escape_unicode`]: char::escape_unicode
162#[derive(Clone, Debug)]
163#[stable(feature = "rust1", since = "1.0.0")]
164pub struct EscapeUnicode(EscapeIterInner<10, AlwaysEscaped>);
165
166impl EscapeUnicode {
167 #[inline]
168 const fn new(c: char) -> Self {
169 Self(EscapeIterInner::unicode(c))
170 }
171}
172
173#[stable(feature = "rust1", since = "1.0.0")]
174impl Iterator for EscapeUnicode {
175 type Item = char;
176
177 #[inline]
178 fn next(&mut self) -> Option<char> {
179 self.0.next().map(char::from)
180 }
181
182 #[inline]
183 fn size_hint(&self) -> (usize, Option<usize>) {
184 let n = self.0.len();
185 (n, Some(n))
186 }
187
188 #[inline]
189 fn count(self) -> usize {
190 self.0.len()
191 }
192
193 #[inline]
194 fn last(mut self) -> Option<char> {
195 self.0.next_back().map(char::from)
196 }
197
198 #[inline]
199 fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
200 self.0.advance_by(n)
201 }
202}
203
204#[stable(feature = "exact_size_escape", since = "1.11.0")]
205impl ExactSizeIterator for EscapeUnicode {
206 #[inline]
207 fn len(&self) -> usize {
208 self.0.len()
209 }
210}
211
212#[stable(feature = "fused", since = "1.26.0")]
213impl FusedIterator for EscapeUnicode {}
214
215#[stable(feature = "char_struct_display", since = "1.16.0")]
216impl fmt::Display for EscapeUnicode {
217 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
218 fmt::Display::fmt(&self.0, f)
219 }
220}
221
222/// An iterator that yields the literal escape code of a `char`.
223///
224/// This `struct` is created by the [`escape_default`] method on [`char`]. See
225/// its documentation for more.
226///
227/// [`escape_default`]: char::escape_default
228#[derive(Clone, Debug)]
229#[stable(feature = "rust1", since = "1.0.0")]
230pub struct EscapeDefault(EscapeIterInner<10, AlwaysEscaped>);
231
232impl EscapeDefault {
233 #[inline]
234 const fn printable(c: ascii::Char) -> Self {
235 Self(EscapeIterInner::ascii(c.to_u8()))
236 }
237
238 #[inline]
239 const fn backslash(c: ascii::Char) -> Self {
240 Self(EscapeIterInner::backslash(c))
241 }
242
243 #[inline]
244 const fn unicode(c: char) -> Self {
245 Self(EscapeIterInner::unicode(c))
246 }
247}
248
249#[stable(feature = "rust1", since = "1.0.0")]
250impl Iterator for EscapeDefault {
251 type Item = char;
252
253 #[inline]
254 fn next(&mut self) -> Option<char> {
255 self.0.next().map(char::from)
256 }
257
258 #[inline]
259 fn size_hint(&self) -> (usize, Option<usize>) {
260 let n = self.0.len();
261 (n, Some(n))
262 }
263
264 #[inline]
265 fn count(self) -> usize {
266 self.0.len()
267 }
268
269 #[inline]
270 fn last(mut self) -> Option<char> {
271 self.0.next_back().map(char::from)
272 }
273
274 #[inline]
275 fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
276 self.0.advance_by(n)
277 }
278}
279
280#[stable(feature = "exact_size_escape", since = "1.11.0")]
281impl ExactSizeIterator for EscapeDefault {
282 #[inline]
283 fn len(&self) -> usize {
284 self.0.len()
285 }
286}
287
288#[stable(feature = "fused", since = "1.26.0")]
289impl FusedIterator for EscapeDefault {}
290
291#[stable(feature = "char_struct_display", since = "1.16.0")]
292impl fmt::Display for EscapeDefault {
293 #[inline]
294 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
295 fmt::Display::fmt(&self.0, f)
296 }
297}
298
299/// An iterator that yields the literal escape code of a `char`.
300///
301/// This `struct` is created by the [`escape_debug`] method on [`char`]. See its
302/// documentation for more.
303///
304/// [`escape_debug`]: char::escape_debug
305#[stable(feature = "char_escape_debug", since = "1.20.0")]
306#[derive(Clone, Debug)]
307pub struct EscapeDebug(EscapeIterInner<10, MaybeEscaped>);
308
309impl EscapeDebug {
310 #[inline]
311 const fn printable(chr: char) -> Self {
312 Self(EscapeIterInner::printable(chr))
313 }
314
315 #[inline]
316 const fn backslash(c: ascii::Char) -> Self {
317 Self(EscapeIterInner::backslash(c))
318 }
319
320 #[inline]
321 const fn unicode(c: char) -> Self {
322 Self(EscapeIterInner::unicode(c))
323 }
324}
325
326#[stable(feature = "char_escape_debug", since = "1.20.0")]
327impl Iterator for EscapeDebug {
328 type Item = char;
329
330 #[inline]
331 fn next(&mut self) -> Option<char> {
332 self.0.next()
333 }
334
335 #[inline]
336 fn size_hint(&self) -> (usize, Option<usize>) {
337 let n: usize = self.len();
338 (n, Some(n))
339 }
340
341 #[inline]
342 fn count(self) -> usize {
343 self.len()
344 }
345}
346
347#[stable(feature = "char_escape_debug", since = "1.20.0")]
348impl ExactSizeIterator for EscapeDebug {
349 fn len(&self) -> usize {
350 self.0.len()
351 }
352}
353
354#[stable(feature = "fused", since = "1.26.0")]
355impl FusedIterator for EscapeDebug {}
356
357#[stable(feature = "char_escape_debug", since = "1.20.0")]
358impl fmt::Display for EscapeDebug {
359 #[inline]
360 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
361 fmt::Display::fmt(&self.0, f)
362 }
363}
364
365macro_rules! casemappingiter_impls {
366 ($(#[$attr:meta])* $ITER_NAME:ident) => {
367 $(#[$attr])*
368 #[stable(feature = "rust1", since = "1.0.0")]
369 #[derive(Debug, Clone)]
370 pub struct $ITER_NAME(CaseMappingIter);
371
372 #[stable(feature = "rust1", since = "1.0.0")]
373 impl Iterator for $ITER_NAME {
374 type Item = char;
375 fn next(&mut self) -> Option<char> {
376 self.0.next()
377 }
378
379 fn size_hint(&self) -> (usize, Option<usize>) {
380 self.0.size_hint()
381 }
382
383 fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
384 where
385 Fold: FnMut(Acc, Self::Item) -> Acc,
386 {
387 self.0.fold(init, fold)
388 }
389
390 fn count(self) -> usize {
391 self.0.count()
392 }
393
394 fn last(self) -> Option<Self::Item> {
395 self.0.last()
396 }
397
398 fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
399 self.0.advance_by(n)
400 }
401
402 unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
403 // SAFETY: just forwarding requirements to caller
404 unsafe { self.0.__iterator_get_unchecked(idx) }
405 }
406 }
407
408 #[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
409 impl DoubleEndedIterator for $ITER_NAME {
410 fn next_back(&mut self) -> Option<char> {
411 self.0.next_back()
412 }
413
414 fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
415 where
416 Fold: FnMut(Acc, Self::Item) -> Acc,
417 {
418 self.0.rfold(init, rfold)
419 }
420
421 fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
422 self.0.advance_back_by(n)
423 }
424 }
425
426 #[stable(feature = "fused", since = "1.26.0")]
427 impl FusedIterator for $ITER_NAME {}
428
429 #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
430 impl ExactSizeIterator for $ITER_NAME {
431 fn len(&self) -> usize {
432 self.0.len()
433 }
434
435 fn is_empty(&self) -> bool {
436 self.0.is_empty()
437 }
438 }
439
440 // SAFETY: forwards to inner `array::IntoIter`
441 #[unstable(feature = "trusted_len", issue = "37572")]
442 unsafe impl TrustedLen for $ITER_NAME {}
443
444 // SAFETY: forwards to inner `array::IntoIter`
445 #[doc(hidden)]
446 #[unstable(feature = "std_internals", issue = "none")]
447 unsafe impl TrustedRandomAccessNoCoerce for $ITER_NAME {
448 const MAY_HAVE_SIDE_EFFECT: bool = false;
449 }
450
451 // SAFETY: this iter has no subtypes/supertypes
452 #[doc(hidden)]
453 #[unstable(feature = "std_internals", issue = "none")]
454 unsafe impl TrustedRandomAccess for $ITER_NAME {}
455
456 #[stable(feature = "char_struct_display", since = "1.16.0")]
457 impl fmt::Display for $ITER_NAME {
458 #[inline]
459 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
460 fmt::Display::fmt(&self.0, f)
461 }
462 }
463 }
464}
465
466casemappingiter_impls! {
467 /// Returns an iterator that yields the lowercase equivalent of a `char`.
468 ///
469 /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
470 /// its documentation for more.
471 ///
472 /// [`to_lowercase`]: char::to_lowercase
473 ToLowercase
474}
475
476casemappingiter_impls! {
477 /// Returns an iterator that yields the uppercase equivalent of a `char`.
478 ///
479 /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
480 /// its documentation for more.
481 ///
482 /// [`to_uppercase`]: char::to_uppercase
483 ToUppercase
484}
485
486#[derive(Debug, Clone)]
487struct CaseMappingIter(core::array::IntoIter<char, 3>);
488
489impl CaseMappingIter {
490 #[inline]
491 fn new(chars: [char; 3]) -> CaseMappingIter {
492 let mut iter: IntoIter = chars.into_iter();
493 if chars[2] == '\0' {
494 iter.next_back();
495 if chars[1] == '\0' {
496 iter.next_back();
497
498 // Deliberately don't check `chars[0]`,
499 // as '\0' lowercases to itself
500 }
501 }
502 CaseMappingIter(iter)
503 }
504}
505
506impl Iterator for CaseMappingIter {
507 type Item = char;
508
509 fn next(&mut self) -> Option<char> {
510 self.0.next()
511 }
512
513 fn size_hint(&self) -> (usize, Option<usize>) {
514 self.0.size_hint()
515 }
516
517 fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
518 where
519 Fold: FnMut(Acc, Self::Item) -> Acc,
520 {
521 self.0.fold(init, fold)
522 }
523
524 fn count(self) -> usize {
525 self.0.count()
526 }
527
528 fn last(self) -> Option<Self::Item> {
529 self.0.last()
530 }
531
532 fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
533 self.0.advance_by(n)
534 }
535
536 unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
537 // SAFETY: just forwarding requirements to caller
538 unsafe { self.0.__iterator_get_unchecked(idx) }
539 }
540}
541
542impl DoubleEndedIterator for CaseMappingIter {
543 fn next_back(&mut self) -> Option<char> {
544 self.0.next_back()
545 }
546
547 fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
548 where
549 Fold: FnMut(Acc, Self::Item) -> Acc,
550 {
551 self.0.rfold(init, f:rfold)
552 }
553
554 fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
555 self.0.advance_back_by(n)
556 }
557}
558
559impl ExactSizeIterator for CaseMappingIter {
560 fn len(&self) -> usize {
561 self.0.len()
562 }
563
564 fn is_empty(&self) -> bool {
565 self.0.is_empty()
566 }
567}
568
569impl FusedIterator for CaseMappingIter {}
570
571// SAFETY: forwards to inner `array::IntoIter`
572unsafe impl TrustedLen for CaseMappingIter {}
573
574// SAFETY: forwards to inner `array::IntoIter`
575unsafe impl TrustedRandomAccessNoCoerce for CaseMappingIter {
576 const MAY_HAVE_SIDE_EFFECT: bool = false;
577}
578
579// SAFETY: `CaseMappingIter` has no subtypes/supertypes
580unsafe impl TrustedRandomAccess for CaseMappingIter {}
581
582impl fmt::Display for CaseMappingIter {
583 #[inline]
584 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
585 for c: char in self.0.clone() {
586 f.write_char(c)?;
587 }
588 Ok(())
589 }
590}
591
592/// The error type returned when a checked char conversion fails.
593#[stable(feature = "u8_from_char", since = "1.59.0")]
594#[derive(Debug, Copy, Clone, PartialEq, Eq)]
595pub struct TryFromCharError(pub(crate) ());
596
597#[stable(feature = "u8_from_char", since = "1.59.0")]
598impl fmt::Display for TryFromCharError {
599 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
600 "unicode code point out of range".fmt(fmt)
601 }
602}
603
604#[stable(feature = "u8_from_char", since = "1.59.0")]
605impl Error for TryFromCharError {}
606