1//! Utilities for the `char` primitive type.
2//!
3//! *[See also the `char` primitive type](primitive@char).*
4//!
5//! The `char` type represents a single character. More specifically, since
6//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
7//! scalar value]', which is similar to, but not the same as, a '[Unicode code
8//! point]'.
9//!
10//! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
11//! [Unicode code point]: https://www.unicode.org/glossary/#code_point
12//!
13//! This module exists for technical reasons, the primary documentation for
14//! `char` is directly on [the `char` primitive type][char] itself.
15//!
16//! This module is the home of the iterator implementations for the iterators
17//! implemented on `char`, as well as some useful constants and conversion
18//! functions that convert various types to `char`.
19
20#![allow(non_snake_case)]
21#![stable(feature = "core_char", since = "1.2.0")]
22
23mod convert;
24mod decode;
25mod methods;
26
27// stable re-exports
28#[stable(feature = "try_from", since = "1.34.0")]
29pub use self::convert::CharTryFromError;
30#[stable(feature = "char_from_str", since = "1.20.0")]
31pub use self::convert::ParseCharError;
32#[stable(feature = "decode_utf16", since = "1.9.0")]
33pub use self::decode::{DecodeUtf16, DecodeUtf16Error};
34
35// perma-unstable re-exports
36#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
37pub use self::methods::encode_utf16_raw;
38#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
39pub use self::methods::encode_utf8_raw;
40
41use crate::ascii;
42use crate::error::Error;
43use crate::escape;
44use crate::fmt::{self, Write};
45use crate::iter::FusedIterator;
46use crate::num::NonZeroUsize;
47
48pub(crate) use self::methods::EscapeDebugExtArgs;
49
50// UTF-8 ranges and tags for encoding characters
51const TAG_CONT: u8 = 0b1000_0000;
52const TAG_TWO_B: u8 = 0b1100_0000;
53const TAG_THREE_B: u8 = 0b1110_0000;
54const TAG_FOUR_B: u8 = 0b1111_0000;
55const MAX_ONE_B: u32 = 0x80;
56const MAX_TWO_B: u32 = 0x800;
57const MAX_THREE_B: u32 = 0x10000;
58
59/*
60 Lu Uppercase_Letter an uppercase letter
61 Ll Lowercase_Letter a lowercase letter
62 Lt Titlecase_Letter a digraphic character, with first part uppercase
63 Lm Modifier_Letter a modifier letter
64 Lo Other_Letter other letters, including syllables and ideographs
65 Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
66 Mc Spacing_Mark a spacing combining mark (positive advance width)
67 Me Enclosing_Mark an enclosing combining mark
68 Nd Decimal_Number a decimal digit
69 Nl Letter_Number a letterlike numeric character
70 No Other_Number a numeric character of other type
71 Pc Connector_Punctuation a connecting punctuation mark, like a tie
72 Pd Dash_Punctuation a dash or hyphen punctuation mark
73 Ps Open_Punctuation an opening punctuation mark (of a pair)
74 Pe Close_Punctuation a closing punctuation mark (of a pair)
75 Pi Initial_Punctuation an initial quotation mark
76 Pf Final_Punctuation a final quotation mark
77 Po Other_Punctuation a punctuation mark of other type
78 Sm Math_Symbol a symbol of primarily mathematical use
79 Sc Currency_Symbol a currency sign
80 Sk Modifier_Symbol a non-letterlike modifier symbol
81 So Other_Symbol a symbol of other type
82 Zs Space_Separator a space character (of various non-zero widths)
83 Zl Line_Separator U+2028 LINE SEPARATOR only
84 Zp Paragraph_Separator U+2029 PARAGRAPH SEPARATOR only
85 Cc Control a C0 or C1 control code
86 Cf Format a format control character
87 Cs Surrogate a surrogate code point
88 Co Private_Use a private-use character
89 Cn Unassigned a reserved unassigned code point or a noncharacter
90*/
91
92/// The highest valid code point a `char` can have, `'\u{10FFFF}'`. Use [`char::MAX`] instead.
93#[stable(feature = "rust1", since = "1.0.0")]
94pub const MAX: char = char::MAX;
95
96/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
97/// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead.
98#[stable(feature = "decode_utf16", since = "1.9.0")]
99pub const REPLACEMENT_CHARACTER: char = char::REPLACEMENT_CHARACTER;
100
101/// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
102/// `char` and `str` methods are based on. Use [`char::UNICODE_VERSION`] instead.
103#[stable(feature = "unicode_version", since = "1.45.0")]
104pub const UNICODE_VERSION: (u8, u8, u8) = char::UNICODE_VERSION;
105
106/// Creates an iterator over the UTF-16 encoded code points in `iter`, returning
107/// unpaired surrogates as `Err`s. Use [`char::decode_utf16`] instead.
108#[stable(feature = "decode_utf16", since = "1.9.0")]
109#[inline]
110pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
111 self::decode::decode_utf16(iter)
112}
113
114/// Converts a `u32` to a `char`. Use [`char::from_u32`] instead.
115#[stable(feature = "rust1", since = "1.0.0")]
116#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
117#[must_use]
118#[inline]
119pub const fn from_u32(i: u32) -> Option<char> {
120 self::convert::from_u32(i)
121}
122
123/// Converts a `u32` to a `char`, ignoring validity. Use [`char::from_u32_unchecked`].
124/// instead.
125#[stable(feature = "char_from_unchecked", since = "1.5.0")]
126#[rustc_const_unstable(feature = "const_char_from_u32_unchecked", issue = "89259")]
127#[must_use]
128#[inline]
129pub const unsafe fn from_u32_unchecked(i: u32) -> char {
130 // SAFETY: the safety contract must be upheld by the caller.
131 unsafe { self::convert::from_u32_unchecked(i) }
132}
133
134/// Converts a digit in the given radix to a `char`. Use [`char::from_digit`] instead.
135#[stable(feature = "rust1", since = "1.0.0")]
136#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
137#[must_use]
138#[inline]
139pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
140 self::convert::from_digit(num, radix)
141}
142
143/// Returns an iterator that yields the hexadecimal Unicode escape of a
144/// character, as `char`s.
145///
146/// This `struct` is created by the [`escape_unicode`] method on [`char`]. See
147/// its documentation for more.
148///
149/// [`escape_unicode`]: char::escape_unicode
150#[derive(Clone, Debug)]
151#[stable(feature = "rust1", since = "1.0.0")]
152pub struct EscapeUnicode(escape::EscapeIterInner<10>);
153
154impl EscapeUnicode {
155 fn new(chr: char) -> Self {
156 let mut data: [AsciiChar; 10] = [ascii::Char::Null; 10];
157 let range: Range = escape::escape_unicode_into(&mut data, ch:chr);
158 Self(escape::EscapeIterInner::new(data, alive:range))
159 }
160}
161
162#[stable(feature = "rust1", since = "1.0.0")]
163impl Iterator for EscapeUnicode {
164 type Item = char;
165
166 #[inline]
167 fn next(&mut self) -> Option<char> {
168 self.0.next().map(char::from)
169 }
170
171 #[inline]
172 fn size_hint(&self) -> (usize, Option<usize>) {
173 let n = self.0.len();
174 (n, Some(n))
175 }
176
177 #[inline]
178 fn count(self) -> usize {
179 self.0.len()
180 }
181
182 #[inline]
183 fn last(mut self) -> Option<char> {
184 self.0.next_back().map(char::from)
185 }
186
187 #[inline]
188 fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
189 self.0.advance_by(n)
190 }
191}
192
193#[stable(feature = "exact_size_escape", since = "1.11.0")]
194impl ExactSizeIterator for EscapeUnicode {
195 #[inline]
196 fn len(&self) -> usize {
197 self.0.len()
198 }
199}
200
201#[stable(feature = "fused", since = "1.26.0")]
202impl FusedIterator for EscapeUnicode {}
203
204#[stable(feature = "char_struct_display", since = "1.16.0")]
205impl fmt::Display for EscapeUnicode {
206 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
207 f.write_str(self.0.as_str())
208 }
209}
210
211/// An iterator that yields the literal escape code of a `char`.
212///
213/// This `struct` is created by the [`escape_default`] method on [`char`]. See
214/// its documentation for more.
215///
216/// [`escape_default`]: char::escape_default
217#[derive(Clone, Debug)]
218#[stable(feature = "rust1", since = "1.0.0")]
219pub struct EscapeDefault(escape::EscapeIterInner<10>);
220
221impl EscapeDefault {
222 fn printable(chr: ascii::Char) -> Self {
223 let data: [AsciiChar; 1] = [chr];
224 Self(escape::EscapeIterInner::from_array(data))
225 }
226
227 fn backslash(chr: ascii::Char) -> Self {
228 let data: [AsciiChar; 2] = [ascii::Char::ReverseSolidus, chr];
229 Self(escape::EscapeIterInner::from_array(data))
230 }
231
232 fn from_unicode(esc: EscapeUnicode) -> Self {
233 Self(esc.0)
234 }
235}
236
237#[stable(feature = "rust1", since = "1.0.0")]
238impl Iterator for EscapeDefault {
239 type Item = char;
240
241 #[inline]
242 fn next(&mut self) -> Option<char> {
243 self.0.next().map(char::from)
244 }
245
246 #[inline]
247 fn size_hint(&self) -> (usize, Option<usize>) {
248 let n = self.0.len();
249 (n, Some(n))
250 }
251
252 #[inline]
253 fn count(self) -> usize {
254 self.0.len()
255 }
256
257 #[inline]
258 fn last(mut self) -> Option<char> {
259 self.0.next_back().map(char::from)
260 }
261
262 #[inline]
263 fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
264 self.0.advance_by(n)
265 }
266}
267
268#[stable(feature = "exact_size_escape", since = "1.11.0")]
269impl ExactSizeIterator for EscapeDefault {
270 #[inline]
271 fn len(&self) -> usize {
272 self.0.len()
273 }
274}
275
276#[stable(feature = "fused", since = "1.26.0")]
277impl FusedIterator for EscapeDefault {}
278
279#[stable(feature = "char_struct_display", since = "1.16.0")]
280impl fmt::Display for EscapeDefault {
281 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
282 f.write_str(self.0.as_str())
283 }
284}
285
286/// An iterator that yields the literal escape code of a `char`.
287///
288/// This `struct` is created by the [`escape_debug`] method on [`char`]. See its
289/// documentation for more.
290///
291/// [`escape_debug`]: char::escape_debug
292#[stable(feature = "char_escape_debug", since = "1.20.0")]
293#[derive(Clone, Debug)]
294pub struct EscapeDebug(EscapeDebugInner);
295
296#[derive(Clone, Debug)]
297// Note: It’s possible to manually encode the EscapeDebugInner inside of
298// EscapeIterInner (e.g. with alive=254..255 indicating that data[0..4] holds
299// a char) which would likely result in a more optimised code. For now we use
300// the option easier to implement.
301enum EscapeDebugInner {
302 Bytes(escape::EscapeIterInner<10>),
303 Char(char),
304}
305
306impl EscapeDebug {
307 fn printable(chr: char) -> Self {
308 Self(EscapeDebugInner::Char(chr))
309 }
310
311 fn backslash(chr: ascii::Char) -> Self {
312 let data: [AsciiChar; 2] = [ascii::Char::ReverseSolidus, chr];
313 let iter: EscapeIterInner<10> = escape::EscapeIterInner::from_array(data);
314 Self(EscapeDebugInner::Bytes(iter))
315 }
316
317 fn from_unicode(esc: EscapeUnicode) -> Self {
318 Self(EscapeDebugInner::Bytes(esc.0))
319 }
320
321 fn clear(&mut self) {
322 let bytes: EscapeIterInner<10> = escape::EscapeIterInner::from_array([]);
323 self.0 = EscapeDebugInner::Bytes(bytes);
324 }
325}
326
327#[stable(feature = "char_escape_debug", since = "1.20.0")]
328impl Iterator for EscapeDebug {
329 type Item = char;
330
331 #[inline]
332 fn next(&mut self) -> Option<char> {
333 match self.0 {
334 EscapeDebugInner::Bytes(ref mut bytes: &mut EscapeIterInner<10>) => bytes.next().map(char::from),
335 EscapeDebugInner::Char(chr: char) => {
336 self.clear();
337 Some(chr)
338 }
339 }
340 }
341
342 fn size_hint(&self) -> (usize, Option<usize>) {
343 let n: usize = self.len();
344 (n, Some(n))
345 }
346
347 #[inline]
348 fn count(self) -> usize {
349 self.len()
350 }
351}
352
353#[stable(feature = "char_escape_debug", since = "1.20.0")]
354impl ExactSizeIterator for EscapeDebug {
355 fn len(&self) -> usize {
356 match &self.0 {
357 EscapeDebugInner::Bytes(bytes: &EscapeIterInner<10>) => bytes.len(),
358 EscapeDebugInner::Char(_) => 1,
359 }
360 }
361}
362
363#[stable(feature = "fused", since = "1.26.0")]
364impl FusedIterator for EscapeDebug {}
365
366#[stable(feature = "char_escape_debug", since = "1.20.0")]
367impl fmt::Display for EscapeDebug {
368 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
369 match &self.0 {
370 EscapeDebugInner::Bytes(bytes: &EscapeIterInner<10>) => f.write_str(data:bytes.as_str()),
371 EscapeDebugInner::Char(chr: &char) => f.write_char(*chr),
372 }
373 }
374}
375
376/// Returns an iterator that yields the lowercase equivalent of a `char`.
377///
378/// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
379/// its documentation for more.
380///
381/// [`to_lowercase`]: char::to_lowercase
382#[stable(feature = "rust1", since = "1.0.0")]
383#[derive(Debug, Clone)]
384pub struct ToLowercase(CaseMappingIter);
385
386#[stable(feature = "rust1", since = "1.0.0")]
387impl Iterator for ToLowercase {
388 type Item = char;
389 fn next(&mut self) -> Option<char> {
390 self.0.next()
391 }
392 fn size_hint(&self) -> (usize, Option<usize>) {
393 self.0.size_hint()
394 }
395}
396
397#[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
398impl DoubleEndedIterator for ToLowercase {
399 fn next_back(&mut self) -> Option<char> {
400 self.0.next_back()
401 }
402}
403
404#[stable(feature = "fused", since = "1.26.0")]
405impl FusedIterator for ToLowercase {}
406
407#[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
408impl ExactSizeIterator for ToLowercase {}
409
410/// Returns an iterator that yields the uppercase equivalent of a `char`.
411///
412/// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
413/// its documentation for more.
414///
415/// [`to_uppercase`]: char::to_uppercase
416#[stable(feature = "rust1", since = "1.0.0")]
417#[derive(Debug, Clone)]
418pub struct ToUppercase(CaseMappingIter);
419
420#[stable(feature = "rust1", since = "1.0.0")]
421impl Iterator for ToUppercase {
422 type Item = char;
423 fn next(&mut self) -> Option<char> {
424 self.0.next()
425 }
426 fn size_hint(&self) -> (usize, Option<usize>) {
427 self.0.size_hint()
428 }
429}
430
431#[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
432impl DoubleEndedIterator for ToUppercase {
433 fn next_back(&mut self) -> Option<char> {
434 self.0.next_back()
435 }
436}
437
438#[stable(feature = "fused", since = "1.26.0")]
439impl FusedIterator for ToUppercase {}
440
441#[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
442impl ExactSizeIterator for ToUppercase {}
443
444#[derive(Debug, Clone)]
445enum CaseMappingIter {
446 Three(char, char, char),
447 Two(char, char),
448 One(char),
449 Zero,
450}
451
452impl CaseMappingIter {
453 fn new(chars: [char; 3]) -> CaseMappingIter {
454 if chars[2] == '\0' {
455 if chars[1] == '\0' {
456 CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0'
457 } else {
458 CaseMappingIter::Two(chars[0], chars[1])
459 }
460 } else {
461 CaseMappingIter::Three(chars[0], chars[1], chars[2])
462 }
463 }
464}
465
466impl Iterator for CaseMappingIter {
467 type Item = char;
468 fn next(&mut self) -> Option<char> {
469 match *self {
470 CaseMappingIter::Three(a, b, c) => {
471 *self = CaseMappingIter::Two(b, c);
472 Some(a)
473 }
474 CaseMappingIter::Two(b, c) => {
475 *self = CaseMappingIter::One(c);
476 Some(b)
477 }
478 CaseMappingIter::One(c) => {
479 *self = CaseMappingIter::Zero;
480 Some(c)
481 }
482 CaseMappingIter::Zero => None,
483 }
484 }
485
486 fn size_hint(&self) -> (usize, Option<usize>) {
487 let size = match self {
488 CaseMappingIter::Three(..) => 3,
489 CaseMappingIter::Two(..) => 2,
490 CaseMappingIter::One(_) => 1,
491 CaseMappingIter::Zero => 0,
492 };
493 (size, Some(size))
494 }
495}
496
497impl DoubleEndedIterator for CaseMappingIter {
498 fn next_back(&mut self) -> Option<char> {
499 match *self {
500 CaseMappingIter::Three(a: char, b: char, c: char) => {
501 *self = CaseMappingIter::Two(a, b);
502 Some(c)
503 }
504 CaseMappingIter::Two(b: char, c: char) => {
505 *self = CaseMappingIter::One(b);
506 Some(c)
507 }
508 CaseMappingIter::One(c: char) => {
509 *self = CaseMappingIter::Zero;
510 Some(c)
511 }
512 CaseMappingIter::Zero => None,
513 }
514 }
515}
516
517impl fmt::Display for CaseMappingIter {
518 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
519 match *self {
520 CaseMappingIter::Three(a: char, b: char, c: char) => {
521 f.write_char(a)?;
522 f.write_char(b)?;
523 f.write_char(c)
524 }
525 CaseMappingIter::Two(b: char, c: char) => {
526 f.write_char(b)?;
527 f.write_char(c)
528 }
529 CaseMappingIter::One(c: char) => f.write_char(c),
530 CaseMappingIter::Zero => Ok(()),
531 }
532 }
533}
534
535#[stable(feature = "char_struct_display", since = "1.16.0")]
536impl fmt::Display for ToLowercase {
537 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
538 fmt::Display::fmt(&self.0, f)
539 }
540}
541
542#[stable(feature = "char_struct_display", since = "1.16.0")]
543impl fmt::Display for ToUppercase {
544 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
545 fmt::Display::fmt(&self.0, f)
546 }
547}
548
549/// The error type returned when a checked char conversion fails.
550#[stable(feature = "u8_from_char", since = "1.59.0")]
551#[derive(Debug, Copy, Clone, PartialEq, Eq)]
552pub struct TryFromCharError(pub(crate) ());
553
554#[stable(feature = "u8_from_char", since = "1.59.0")]
555impl fmt::Display for TryFromCharError {
556 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
557 "unicode code point out of range".fmt(fmt)
558 }
559}
560
561#[stable(feature = "u8_from_char", since = "1.59.0")]
562impl Error for TryFromCharError {}
563