1//! Utilities for the `char` primitive type.
2//!
3//! *[See also the `char` primitive type](primitive@char).*
4//!
5//! The `char` type represents a single character. More specifically, since
6//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
7//! scalar value]', which is similar to, but not the same as, a '[Unicode code
8//! point]'.
9//!
10//! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
11//! [Unicode code point]: https://www.unicode.org/glossary/#code_point
12//!
13//! This module exists for technical reasons, the primary documentation for
14//! `char` is directly on [the `char` primitive type][char] itself.
15//!
16//! This module is the home of the iterator implementations for the iterators
17//! implemented on `char`, as well as some useful constants and conversion
18//! functions that convert various types to `char`.
19
20#![allow(non_snake_case)]
21#![stable(feature = "core_char", since = "1.2.0")]
22
23mod convert;
24mod decode;
25mod methods;
26
27// stable re-exports
28#[stable(feature = "try_from", since = "1.34.0")]
29pub use self::convert::CharTryFromError;
30#[stable(feature = "char_from_str", since = "1.20.0")]
31pub use self::convert::ParseCharError;
32#[stable(feature = "decode_utf16", since = "1.9.0")]
33pub use self::decode::{DecodeUtf16, DecodeUtf16Error};
34
35// perma-unstable re-exports
36#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
37pub use self::methods::encode_utf16_raw;
38#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
39pub use self::methods::encode_utf8_raw;
40
41use crate::ascii;
42use crate::error::Error;
43use crate::escape;
44use crate::fmt::{self, Write};
45use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce};
46use crate::num::NonZero;
47
48pub(crate) use self::methods::EscapeDebugExtArgs;
49
50// UTF-8 ranges and tags for encoding characters
51const TAG_CONT: u8 = 0b1000_0000;
52const TAG_TWO_B: u8 = 0b1100_0000;
53const TAG_THREE_B: u8 = 0b1110_0000;
54const TAG_FOUR_B: u8 = 0b1111_0000;
55const MAX_ONE_B: u32 = 0x80;
56const MAX_TWO_B: u32 = 0x800;
57const MAX_THREE_B: u32 = 0x10000;
58
59/*
60 Lu Uppercase_Letter an uppercase letter
61 Ll Lowercase_Letter a lowercase letter
62 Lt Titlecase_Letter a digraphic character, with first part uppercase
63 Lm Modifier_Letter a modifier letter
64 Lo Other_Letter other letters, including syllables and ideographs
65 Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
66 Mc Spacing_Mark a spacing combining mark (positive advance width)
67 Me Enclosing_Mark an enclosing combining mark
68 Nd Decimal_Number a decimal digit
69 Nl Letter_Number a letterlike numeric character
70 No Other_Number a numeric character of other type
71 Pc Connector_Punctuation a connecting punctuation mark, like a tie
72 Pd Dash_Punctuation a dash or hyphen punctuation mark
73 Ps Open_Punctuation an opening punctuation mark (of a pair)
74 Pe Close_Punctuation a closing punctuation mark (of a pair)
75 Pi Initial_Punctuation an initial quotation mark
76 Pf Final_Punctuation a final quotation mark
77 Po Other_Punctuation a punctuation mark of other type
78 Sm Math_Symbol a symbol of primarily mathematical use
79 Sc Currency_Symbol a currency sign
80 Sk Modifier_Symbol a non-letterlike modifier symbol
81 So Other_Symbol a symbol of other type
82 Zs Space_Separator a space character (of various non-zero widths)
83 Zl Line_Separator U+2028 LINE SEPARATOR only
84 Zp Paragraph_Separator U+2029 PARAGRAPH SEPARATOR only
85 Cc Control a C0 or C1 control code
86 Cf Format a format control character
87 Cs Surrogate a surrogate code point
88 Co Private_Use a private-use character
89 Cn Unassigned a reserved unassigned code point or a noncharacter
90*/
91
92/// The highest valid code point a `char` can have, `'\u{10FFFF}'`. Use [`char::MAX`] instead.
93#[stable(feature = "rust1", since = "1.0.0")]
94pub const MAX: char = char::MAX;
95
96/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
97/// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead.
98#[stable(feature = "decode_utf16", since = "1.9.0")]
99pub const REPLACEMENT_CHARACTER: char = char::REPLACEMENT_CHARACTER;
100
101/// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
102/// `char` and `str` methods are based on. Use [`char::UNICODE_VERSION`] instead.
103#[stable(feature = "unicode_version", since = "1.45.0")]
104pub const UNICODE_VERSION: (u8, u8, u8) = char::UNICODE_VERSION;
105
106/// Creates an iterator over the UTF-16 encoded code points in `iter`, returning
107/// unpaired surrogates as `Err`s. Use [`char::decode_utf16`] instead.
108#[stable(feature = "decode_utf16", since = "1.9.0")]
109#[inline]
110pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
111 self::decode::decode_utf16(iter)
112}
113
114/// Converts a `u32` to a `char`. Use [`char::from_u32`] instead.
115#[stable(feature = "rust1", since = "1.0.0")]
116#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
117#[must_use]
118#[inline]
119pub const fn from_u32(i: u32) -> Option<char> {
120 self::convert::from_u32(i)
121}
122
123/// Converts a `u32` to a `char`, ignoring validity. Use [`char::from_u32_unchecked`].
124/// instead.
125#[stable(feature = "char_from_unchecked", since = "1.5.0")]
126#[rustc_const_unstable(feature = "const_char_from_u32_unchecked", issue = "89259")]
127#[must_use]
128#[inline]
129pub const unsafe fn from_u32_unchecked(i: u32) -> char {
130 // SAFETY: the safety contract must be upheld by the caller.
131 unsafe { self::convert::from_u32_unchecked(i) }
132}
133
134/// Converts a digit in the given radix to a `char`. Use [`char::from_digit`] instead.
135#[stable(feature = "rust1", since = "1.0.0")]
136#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
137#[must_use]
138#[inline]
139pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
140 self::convert::from_digit(num, radix)
141}
142
143/// Returns an iterator that yields the hexadecimal Unicode escape of a
144/// character, as `char`s.
145///
146/// This `struct` is created by the [`escape_unicode`] method on [`char`]. See
147/// its documentation for more.
148///
149/// [`escape_unicode`]: char::escape_unicode
150#[derive(Clone, Debug)]
151#[stable(feature = "rust1", since = "1.0.0")]
152pub struct EscapeUnicode(escape::EscapeIterInner<10>);
153
154impl EscapeUnicode {
155 fn new(chr: char) -> Self {
156 let mut data: [AsciiChar; 10] = [ascii::Char::Null; 10];
157 let range: Range = escape::escape_unicode_into(&mut data, ch:chr);
158 Self(escape::EscapeIterInner::new(data, alive:range))
159 }
160}
161
162#[stable(feature = "rust1", since = "1.0.0")]
163impl Iterator for EscapeUnicode {
164 type Item = char;
165
166 #[inline]
167 fn next(&mut self) -> Option<char> {
168 self.0.next().map(char::from)
169 }
170
171 #[inline]
172 fn size_hint(&self) -> (usize, Option<usize>) {
173 let n = self.0.len();
174 (n, Some(n))
175 }
176
177 #[inline]
178 fn count(self) -> usize {
179 self.0.len()
180 }
181
182 #[inline]
183 fn last(mut self) -> Option<char> {
184 self.0.next_back().map(char::from)
185 }
186
187 #[inline]
188 fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
189 self.0.advance_by(n)
190 }
191}
192
193#[stable(feature = "exact_size_escape", since = "1.11.0")]
194impl ExactSizeIterator for EscapeUnicode {
195 #[inline]
196 fn len(&self) -> usize {
197 self.0.len()
198 }
199}
200
201#[stable(feature = "fused", since = "1.26.0")]
202impl FusedIterator for EscapeUnicode {}
203
204#[stable(feature = "char_struct_display", since = "1.16.0")]
205impl fmt::Display for EscapeUnicode {
206 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
207 f.write_str(self.0.as_str())
208 }
209}
210
211/// An iterator that yields the literal escape code of a `char`.
212///
213/// This `struct` is created by the [`escape_default`] method on [`char`]. See
214/// its documentation for more.
215///
216/// [`escape_default`]: char::escape_default
217#[derive(Clone, Debug)]
218#[stable(feature = "rust1", since = "1.0.0")]
219pub struct EscapeDefault(escape::EscapeIterInner<10>);
220
221impl EscapeDefault {
222 fn printable(chr: ascii::Char) -> Self {
223 let data: [AsciiChar; 1] = [chr];
224 Self(escape::EscapeIterInner::from_array(data))
225 }
226
227 fn backslash(chr: ascii::Char) -> Self {
228 let data: [AsciiChar; 2] = [ascii::Char::ReverseSolidus, chr];
229 Self(escape::EscapeIterInner::from_array(data))
230 }
231
232 fn from_unicode(esc: EscapeUnicode) -> Self {
233 Self(esc.0)
234 }
235}
236
237#[stable(feature = "rust1", since = "1.0.0")]
238impl Iterator for EscapeDefault {
239 type Item = char;
240
241 #[inline]
242 fn next(&mut self) -> Option<char> {
243 self.0.next().map(char::from)
244 }
245
246 #[inline]
247 fn size_hint(&self) -> (usize, Option<usize>) {
248 let n = self.0.len();
249 (n, Some(n))
250 }
251
252 #[inline]
253 fn count(self) -> usize {
254 self.0.len()
255 }
256
257 #[inline]
258 fn last(mut self) -> Option<char> {
259 self.0.next_back().map(char::from)
260 }
261
262 #[inline]
263 fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
264 self.0.advance_by(n)
265 }
266}
267
268#[stable(feature = "exact_size_escape", since = "1.11.0")]
269impl ExactSizeIterator for EscapeDefault {
270 #[inline]
271 fn len(&self) -> usize {
272 self.0.len()
273 }
274}
275
276#[stable(feature = "fused", since = "1.26.0")]
277impl FusedIterator for EscapeDefault {}
278
279#[stable(feature = "char_struct_display", since = "1.16.0")]
280impl fmt::Display for EscapeDefault {
281 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
282 f.write_str(self.0.as_str())
283 }
284}
285
286/// An iterator that yields the literal escape code of a `char`.
287///
288/// This `struct` is created by the [`escape_debug`] method on [`char`]. See its
289/// documentation for more.
290///
291/// [`escape_debug`]: char::escape_debug
292#[stable(feature = "char_escape_debug", since = "1.20.0")]
293#[derive(Clone, Debug)]
294pub struct EscapeDebug(EscapeDebugInner);
295
296#[derive(Clone, Debug)]
297// Note: It’s possible to manually encode the EscapeDebugInner inside of
298// EscapeIterInner (e.g. with alive=254..255 indicating that data[0..4] holds
299// a char) which would likely result in a more optimised code. For now we use
300// the option easier to implement.
301enum EscapeDebugInner {
302 Bytes(escape::EscapeIterInner<10>),
303 Char(char),
304}
305
306impl EscapeDebug {
307 fn printable(chr: char) -> Self {
308 Self(EscapeDebugInner::Char(chr))
309 }
310
311 fn backslash(chr: ascii::Char) -> Self {
312 let data: [AsciiChar; 2] = [ascii::Char::ReverseSolidus, chr];
313 let iter: EscapeIterInner<10> = escape::EscapeIterInner::from_array(data);
314 Self(EscapeDebugInner::Bytes(iter))
315 }
316
317 fn from_unicode(esc: EscapeUnicode) -> Self {
318 Self(EscapeDebugInner::Bytes(esc.0))
319 }
320
321 fn clear(&mut self) {
322 let bytes: EscapeIterInner<10> = escape::EscapeIterInner::from_array([]);
323 self.0 = EscapeDebugInner::Bytes(bytes);
324 }
325}
326
327#[stable(feature = "char_escape_debug", since = "1.20.0")]
328impl Iterator for EscapeDebug {
329 type Item = char;
330
331 #[inline]
332 fn next(&mut self) -> Option<char> {
333 match self.0 {
334 EscapeDebugInner::Bytes(ref mut bytes: &mut EscapeIterInner<10>) => bytes.next().map(char::from),
335 EscapeDebugInner::Char(chr: char) => {
336 self.clear();
337 Some(chr)
338 }
339 }
340 }
341
342 fn size_hint(&self) -> (usize, Option<usize>) {
343 let n: usize = self.len();
344 (n, Some(n))
345 }
346
347 #[inline]
348 fn count(self) -> usize {
349 self.len()
350 }
351}
352
353#[stable(feature = "char_escape_debug", since = "1.20.0")]
354impl ExactSizeIterator for EscapeDebug {
355 fn len(&self) -> usize {
356 match &self.0 {
357 EscapeDebugInner::Bytes(bytes: &EscapeIterInner<10>) => bytes.len(),
358 EscapeDebugInner::Char(_) => 1,
359 }
360 }
361}
362
363#[stable(feature = "fused", since = "1.26.0")]
364impl FusedIterator for EscapeDebug {}
365
366#[stable(feature = "char_escape_debug", since = "1.20.0")]
367impl fmt::Display for EscapeDebug {
368 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
369 match &self.0 {
370 EscapeDebugInner::Bytes(bytes: &EscapeIterInner<10>) => f.write_str(data:bytes.as_str()),
371 EscapeDebugInner::Char(chr: &char) => f.write_char(*chr),
372 }
373 }
374}
375
376macro_rules! casemappingiter_impls {
377 ($(#[$attr:meta])* $ITER_NAME:ident) => {
378 $(#[$attr])*
379 #[stable(feature = "rust1", since = "1.0.0")]
380 #[derive(Debug, Clone)]
381 pub struct $ITER_NAME(CaseMappingIter);
382
383 #[stable(feature = "rust1", since = "1.0.0")]
384 impl Iterator for $ITER_NAME {
385 type Item = char;
386 fn next(&mut self) -> Option<char> {
387 self.0.next()
388 }
389
390 fn size_hint(&self) -> (usize, Option<usize>) {
391 self.0.size_hint()
392 }
393
394 fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
395 where
396 Fold: FnMut(Acc, Self::Item) -> Acc,
397 {
398 self.0.fold(init, fold)
399 }
400
401 fn count(self) -> usize {
402 self.0.count()
403 }
404
405 fn last(self) -> Option<Self::Item> {
406 self.0.last()
407 }
408
409 fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
410 self.0.advance_by(n)
411 }
412
413 unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
414 // SAFETY: just forwarding requirements to caller
415 unsafe { self.0.__iterator_get_unchecked(idx) }
416 }
417 }
418
419 #[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
420 impl DoubleEndedIterator for $ITER_NAME {
421 fn next_back(&mut self) -> Option<char> {
422 self.0.next_back()
423 }
424
425 fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
426 where
427 Fold: FnMut(Acc, Self::Item) -> Acc,
428 {
429 self.0.rfold(init, rfold)
430 }
431
432 fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
433 self.0.advance_back_by(n)
434 }
435 }
436
437 #[stable(feature = "fused", since = "1.26.0")]
438 impl FusedIterator for $ITER_NAME {}
439
440 #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
441 impl ExactSizeIterator for $ITER_NAME {
442 fn len(&self) -> usize {
443 self.0.len()
444 }
445
446 fn is_empty(&self) -> bool {
447 self.0.is_empty()
448 }
449 }
450
451 // SAFETY: forwards to inner `array::IntoIter`
452 #[unstable(feature = "trusted_len", issue = "37572")]
453 unsafe impl TrustedLen for $ITER_NAME {}
454
455 // SAFETY: forwards to inner `array::IntoIter`
456 #[doc(hidden)]
457 #[unstable(feature = "std_internals", issue = "none")]
458 unsafe impl TrustedRandomAccessNoCoerce for $ITER_NAME {
459 const MAY_HAVE_SIDE_EFFECT: bool = false;
460 }
461
462 // SAFETY: this iter has no subtypes/supertypes
463 #[doc(hidden)]
464 #[unstable(feature = "std_internals", issue = "none")]
465 unsafe impl TrustedRandomAccess for $ITER_NAME {}
466
467 #[stable(feature = "char_struct_display", since = "1.16.0")]
468 impl fmt::Display for $ITER_NAME {
469 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
470 fmt::Display::fmt(&self.0, f)
471 }
472 }
473 }
474}
475
476casemappingiter_impls! {
477 /// Returns an iterator that yields the lowercase equivalent of a `char`.
478 ///
479 /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
480 /// its documentation for more.
481 ///
482 /// [`to_lowercase`]: char::to_lowercase
483 ToLowercase
484}
485
486casemappingiter_impls! {
487 /// Returns an iterator that yields the uppercase equivalent of a `char`.
488 ///
489 /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
490 /// its documentation for more.
491 ///
492 /// [`to_uppercase`]: char::to_uppercase
493 ToUppercase
494}
495
496#[derive(Debug, Clone)]
497struct CaseMappingIter(core::array::IntoIter<char, 3>);
498
499impl CaseMappingIter {
500 #[inline]
501 fn new(chars: [char; 3]) -> CaseMappingIter {
502 let mut iter: IntoIter = chars.into_iter();
503 if chars[2] == '\0' {
504 iter.next_back();
505 if chars[1] == '\0' {
506 iter.next_back();
507
508 // Deliberately don't check `chars[0]`,
509 // as '\0' lowercases to itself
510 }
511 }
512 CaseMappingIter(iter)
513 }
514}
515
516impl Iterator for CaseMappingIter {
517 type Item = char;
518
519 fn next(&mut self) -> Option<char> {
520 self.0.next()
521 }
522
523 fn size_hint(&self) -> (usize, Option<usize>) {
524 self.0.size_hint()
525 }
526
527 fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
528 where
529 Fold: FnMut(Acc, Self::Item) -> Acc,
530 {
531 self.0.fold(init, fold)
532 }
533
534 fn count(self) -> usize {
535 self.0.count()
536 }
537
538 fn last(self) -> Option<Self::Item> {
539 self.0.last()
540 }
541
542 fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
543 self.0.advance_by(n)
544 }
545
546 unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
547 // SAFETY: just forwarding requirements to caller
548 unsafe { self.0.__iterator_get_unchecked(idx) }
549 }
550}
551
552impl DoubleEndedIterator for CaseMappingIter {
553 fn next_back(&mut self) -> Option<char> {
554 self.0.next_back()
555 }
556
557 fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
558 where
559 Fold: FnMut(Acc, Self::Item) -> Acc,
560 {
561 self.0.rfold(init, f:rfold)
562 }
563
564 fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
565 self.0.advance_back_by(n)
566 }
567}
568
569impl ExactSizeIterator for CaseMappingIter {
570 fn len(&self) -> usize {
571 self.0.len()
572 }
573
574 fn is_empty(&self) -> bool {
575 self.0.is_empty()
576 }
577}
578
579impl FusedIterator for CaseMappingIter {}
580
581// SAFETY: forwards to inner `array::IntoIter`
582unsafe impl TrustedLen for CaseMappingIter {}
583
584// SAFETY: forwards to inner `array::IntoIter`
585unsafe impl TrustedRandomAccessNoCoerce for CaseMappingIter {
586 const MAY_HAVE_SIDE_EFFECT: bool = false;
587}
588
589// SAFETY: `CaseMappingIter` has no subtypes/supertypes
590unsafe impl TrustedRandomAccess for CaseMappingIter {}
591
592impl fmt::Display for CaseMappingIter {
593 #[inline]
594 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
595 for c: char in self.0.clone() {
596 f.write_char(c)?;
597 }
598 Ok(())
599 }
600}
601
602/// The error type returned when a checked char conversion fails.
603#[stable(feature = "u8_from_char", since = "1.59.0")]
604#[derive(Debug, Copy, Clone, PartialEq, Eq)]
605pub struct TryFromCharError(pub(crate) ());
606
607#[stable(feature = "u8_from_char", since = "1.59.0")]
608impl fmt::Display for TryFromCharError {
609 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
610 "unicode code point out of range".fmt(fmt)
611 }
612}
613
614#[stable(feature = "u8_from_char", since = "1.59.0")]
615impl Error for TryFromCharError {}
616