methods.rs source code [crates/core/src/char/methods.rs]

1	//! impl char {}
2
3	use crate::slice;
4	use crate::str::from_utf8_unchecked_mut;
5	use crate::unicode::printable::is_printable;
6	use crate::unicode::{self, conversions};
7
8	use super::*;
9
10	impl char {
11	/// The lowest valid code point a `char` can have, `'\0'`.
12	///
13	/// Unlike integer types, `char` actually has a gap in the middle,
14	/// meaning that the range of possible `char`s is smaller than you
15	/// might expect. Ranges of `char` will automatically hop this gap
16	/// for you:
17	///
18	/// ```
19	/// #![feature(char_min)]
20	/// let dist = u32::from(char::MAX) - u32::from(char::MIN);
21	/// let size = (char::MIN..=char::MAX).count() as u32;
22	/// assert!(size < dist);
23	/// ```
24	///
25	/// Despite this gap, the `MIN` and [`MAX`] values can be used as bounds for
26	/// all `char` values.
27	///
28	/// [`MAX`]: char::MAX
29	///
30	/// # Examples
31	///
32	/// ```
33	/// #![feature(char_min)]
34	/// # fn something_which_returns_char() -> char { 'a' }
35	/// let c: char = something_which_returns_char();
36	/// assert!(char::MIN <= c);
37	///
38	/// let value_at_min = u32::from(char::MIN);
39	/// assert_eq!(char::from_u32(value_at_min), Some('`\0`'));
40	/// ```
41	#[unstable(feature = "char_min", issue = "114298")]
42	pub const MIN: char = '`\0`';
43
44	/// The highest valid code point a `char` can have, `'\u{10FFFF}'`.
45	///
46	/// Unlike integer types, `char` actually has a gap in the middle,
47	/// meaning that the range of possible `char`s is smaller than you
48	/// might expect. Ranges of `char` will automatically hop this gap
49	/// for you:
50	///
51	/// ```
52	/// #![feature(char_min)]
53	/// let dist = u32::from(char::MAX) - u32::from(char::MIN);
54	/// let size = (char::MIN..=char::MAX).count() as u32;
55	/// assert!(size < dist);
56	/// ```
57	///
58	/// Despite this gap, the [`MIN`] and `MAX` values can be used as bounds for
59	/// all `char` values.
60	///
61	/// [`MIN`]: char::MIN
62	///
63	/// # Examples
64	///
65	/// ```
66	/// # fn something_which_returns_char() -> char { 'a' }
67	/// let c: char = something_which_returns_char();
68	/// assert!(c <= char::MAX);
69	///
70	/// let value_at_max = u32::from(char::MAX);
71	/// assert_eq!(char::from_u32(value_at_max), Some('`\u{10FFFF}`'));
72	/// assert_eq!(char::from_u32(value_at_max + `1`), None);
73	/// ```
74	#[stable(feature = "assoc_char_consts", since = "1.52.0")]
75	pub const MAX: char = '`\u{10ffff}`';
76
77	/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
78	/// decoding error.
79	///
80	/// It can occur, for example, when giving ill-formed UTF-8 bytes to
81	/// [`String::from_utf8_lossy`](../std/string/struct.String.html#method.from_utf8_lossy).
82	#[stable(feature = "assoc_char_consts", since = "1.52.0")]
83	pub const REPLACEMENT_CHARACTER: char = '`\u{FFFD}`';
84
85	/// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
86	/// `char` and `str` methods are based on.
87	///
88	/// New versions of Unicode are released regularly and subsequently all methods
89	/// in the standard library depending on Unicode are updated. Therefore the
90	/// behavior of some `char` and `str` methods and the value of this constant
91	/// changes over time. This is not* considered to be a breaking change.*
92	///
93	/// The version numbering scheme is explained in
94	/// [Unicode 11.0 or later, Section 3.1 Versions of the Unicode Standard](https://www.unicode.org/versions/Unicode11.0.0/ch03.pdf#page=4).
95	#[stable(feature = "assoc_char_consts", since = "1.52.0")]
96	pub const UNICODE_VERSION: (u8, u8, u8) = crate::unicode::UNICODE_VERSION;
97
98	/// Creates an iterator over the UTF-16 encoded code points in `iter`,
99	/// returning unpaired surrogates as `Err`s.
100	///
101	/// # Examples
102	///
103	/// Basic usage:
104	///
105	/// ```
106	/// // 𝄞mus<invalid>ic<invalid>
107	/// let v = [
108	/// `0xD834`, `0xDD1E`, `0x006d`, `0x0075`, `0x0073`, `0xDD1E`, `0x0069`, `0x0063`, `0xD834`,
109	/// ];
110	///
111	/// assert_eq!(
112	/// char::decode_utf16(v)
113	/// .map(\|r\| r.map_err(\|e\| e.unpaired_surrogate()))
114	/// .collect::<Vec<_>>(),
115	/// vec![
116	/// Ok('𝄞'),
117	/// Ok('m'), Ok('u'), Ok('s'),
118	/// Err(`0xDD1E`),
119	/// Ok('i'), Ok('c'),
120	/// Err(`0xD834`)
121	/// ]
122	/// );
123	/// ```
124	///
125	/// A lossy decoder can be obtained by replacing `Err` results with the replacement character:
126	///
127	/// ```
128	/// // 𝄞mus<invalid>ic<invalid>
129	/// let v = [
130	/// `0xD834`, `0xDD1E`, `0x006d`, `0x0075`, `0x0073`, `0xDD1E`, `0x0069`, `0x0063`, `0xD834`,
131	/// ];
132	///
133	/// assert_eq!(
134	/// char::decode_utf16(v)
135	/// .map(\|r\| r.unwrap_or(char::REPLACEMENT_CHARACTER))
136	/// .collect::<String>(),
137	/// "𝄞mus�ic�"
138	/// );
139	/// ```
140	#[stable(feature = "assoc_char_funcs", since = "1.52.0")]
141	#[inline]
142	pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
143	super::decode::decode_utf16(iter)
144	}
145
146	/// Converts a `u32` to a `char`.
147	///
148	/// Note that all `char`s are valid [`u32`]s, and can be cast to one with
149	/// [`as`](../std/keyword.as.html):
150	///
151	/// ```
152	/// let c = '💯';
153	/// let i = c as u32;
154	///
155	/// assert_eq!(`128175`, i);
156	/// ```
157	///
158	/// However, the reverse is not true: not all valid [`u32`]s are valid
159	/// `char`s. `from_u32()` will return `None` if the input is not a valid value
160	/// for a `char`.
161	///
162	/// For an unsafe version of this function which ignores these checks, see
163	/// [`from_u32_unchecked`].
164	///
165	/// [`from_u32_unchecked`]: #method.from_u32_unchecked
166	///
167	/// # Examples
168	///
169	/// Basic usage:
170	///
171	/// ```
172	/// let c = char::from_u32(`0x2764`);
173	///
174	/// assert_eq!(Some('❤'), c);
175	/// ```
176	///
177	/// Returning `None` when the input is not a valid `char`:
178	///
179	/// ```
180	/// let c = char::from_u32(`0x110000`);
181	///
182	/// assert_eq!(None, c);
183	/// ```
184	#[stable(feature = "assoc_char_funcs", since = "1.52.0")]
185	#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
186	#[must_use]
187	#[inline]
188	pub const fn from_u32(i: u32) -> Option<char> {
189	super::convert::from_u32(i)
190	}
191
192	/// Converts a `u32` to a `char`, ignoring validity.
193	///
194	/// Note that all `char`s are valid [`u32`]s, and can be cast to one with
195	/// `as`:
196	///
197	/// ```
198	/// let c = '💯';
199	/// let i = c as u32;
200	///
201	/// assert_eq!(`128175`, i);
202	/// ```
203	///
204	/// However, the reverse is not true: not all valid [`u32`]s are valid
205	/// `char`s. `from_u32_unchecked()` will ignore this, and blindly cast to
206	/// `char`, possibly creating an invalid one.
207	///
208	/// # Safety
209	///
210	/// This function is unsafe, as it may construct invalid `char` values.
211	///
212	/// For a safe version of this function, see the [`from_u32`] function.
213	///
214	/// [`from_u32`]: #method.from_u32
215	///
216	/// # Examples
217	///
218	/// Basic usage:
219	///
220	/// ```
221	/// let c = unsafe { char::from_u32_unchecked(`0x2764`) };
222	///
223	/// assert_eq!('❤', c);
224	/// ```
225	#[stable(feature = "assoc_char_funcs", since = "1.52.0")]
226	#[rustc_const_unstable(feature = "const_char_from_u32_unchecked", issue = "89259")]
227	#[must_use]
228	#[inline]
229	pub const unsafe fn from_u32_unchecked(i: u32) -> char {
230	// SAFETY: the safety contract must be upheld by the caller.
231	unsafe { super::convert::from_u32_unchecked(i) }
232	}
233
234	/// Converts a digit in the given radix to a `char`.
235	///
236	/// A 'radix' here is sometimes also called a 'base'. A radix of two
237	/// indicates a binary number, a radix of ten, decimal, and a radix of
238	/// sixteen, hexadecimal, to give some common values. Arbitrary
239	/// radices are supported.
240	///
241	/// `from_digit()` will return `None` if the input is not a digit in
242	/// the given radix.
243	///
244	/// # Panics
245	///
246	/// Panics if given a radix larger than 36.
247	///
248	/// # Examples
249	///
250	/// Basic usage:
251	///
252	/// ```
253	/// let c = char::from_digit(`4`, `10`);
254	///
255	/// assert_eq!(Some('4'), c);
256	///
257	/// // Decimal 11 is a single digit in base 16
258	/// let c = char::from_digit(`11`, `16`);
259	///
260	/// assert_eq!(Some('b'), c);
261	/// ```
262	///
263	/// Returning `None` when the input is not a digit:
264	///
265	/// ```
266	/// let c = char::from_digit(`20`, `10`);
267	///
268	/// assert_eq!(None, c);
269	/// ```
270	///
271	/// Passing a large radix, causing a panic:
272	///
273	/// ```should_panic
274	/// // this panics
275	/// let _c = char::from_digit(`1`, `37`);
276	/// ```
277	#[stable(feature = "assoc_char_funcs", since = "1.52.0")]
278	#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
279	#[must_use]
280	#[inline]
281	pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
282	super::convert::from_digit(num, radix)
283	}
284
285	/// Checks if a `char` is a digit in the given radix.
286	///
287	/// A 'radix' here is sometimes also called a 'base'. A radix of two
288	/// indicates a binary number, a radix of ten, decimal, and a radix of
289	/// sixteen, hexadecimal, to give some common values. Arbitrary
290	/// radices are supported.
291	///
292	/// Compared to [`is_numeric()`], this function only recognizes the characters
293	/// `0-9`, `a-z` and `A-Z`.
294	///
295	/// 'Digit' is defined to be only the following characters:
296	///
297	/// `0-9`*
298	/// `a-z`*
299	/// `A-Z`*
300	///
301	/// For a more comprehensive understanding of 'digit', see [`is_numeric()`].
302	///
303	/// [`is_numeric()`]: #method.is_numeric
304	///
305	/// # Panics
306	///
307	/// Panics if given a radix larger than 36.
308	///
309	/// # Examples
310	///
311	/// Basic usage:
312	///
313	/// ```
314	/// assert!('1'.is_digit(`10`));
315	/// assert!('f'.is_digit(`16`));
316	/// assert!(!'f'.is_digit(`10`));
317	/// ```
318	///
319	/// Passing a large radix, causing a panic:
320	///
321	/// ```should_panic
322	/// // this panics
323	/// '1'.is_digit(`37`);
324	/// ```
325	#[stable(feature = "rust1", since = "1.0.0")]
326	#[inline]
327	pub fn is_digit(self, radix: u32) -> bool {
328	self.to_digit(radix).is_some()
329	}
330
331	/// Converts a `char` to a digit in the given radix.
332	///
333	/// A 'radix' here is sometimes also called a 'base'. A radix of two
334	/// indicates a binary number, a radix of ten, decimal, and a radix of
335	/// sixteen, hexadecimal, to give some common values. Arbitrary
336	/// radices are supported.
337	///
338	/// 'Digit' is defined to be only the following characters:
339	///
340	/// `0-9`*
341	/// `a-z`*
342	/// `A-Z`*
343	///
344	/// # Errors
345	///
346	/// Returns `None` if the `char` does not refer to a digit in the given radix.
347	///
348	/// # Panics
349	///
350	/// Panics if given a radix larger than 36.
351	///
352	/// # Examples
353	///
354	/// Basic usage:
355	///
356	/// ```
357	/// assert_eq!('1'.to_digit(`10`), Some(`1`));
358	/// assert_eq!('f'.to_digit(`16`), Some(`15`));
359	/// ```
360	///
361	/// Passing a non-digit results in failure:
362	///
363	/// ```
364	/// assert_eq!('f'.to_digit(`10`), None);
365	/// assert_eq!('z'.to_digit(`16`), None);
366	/// ```
367	///
368	/// Passing a large radix, causing a panic:
369	///
370	/// ```should_panic
371	/// // this panics
372	/// let _ = '1'.to_digit(`37`);
373	/// ```
374	#[stable(feature = "rust1", since = "1.0.0")]
375	#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
376	#[must_use = "this returns the result of the operation, \
377	without modifying the original"]
378	#[inline]
379	pub const fn to_digit(self, radix: u32) -> Option<u32> {
380	// If not a digit, a number greater than radix will be created.
381	let mut digit = (self as u32).wrapping_sub('0' as u32);
382	if radix > `10` {
383	assert!(radix <= `36`, "to_digit: radix is too high (maximum 36)");
384	if digit < `10` {
385	return Some(digit);
386	}
387	// Force the 6th bit to be set to ensure ascii is lower case.
388	digit = (self as u32 \| `0b10_0000`).wrapping_sub('a' as u32).saturating_add(`10`);
389	}
390	// FIXME: once then_some is const fn, use it here
391	if digit < radix { Some(digit) } else { None }
392	}
393
394	/// Returns an iterator that yields the hexadecimal Unicode escape of a
395	/// character as `char`s.
396	///
397	/// This will escape characters with the Rust syntax of the form
398	/// `\u{NNNNNN}` where `NNNNNN` is a hexadecimal representation.
399	///
400	/// # Examples
401	///
402	/// As an iterator:
403	///
404	/// ```
405	/// for c in '❤'.escape_unicode() {
406	/// print!("{c}");
407	/// }
408	/// println!();
409	/// ```
410	///
411	/// Using `println!` directly:
412	///
413	/// ```
414	/// println!("{}", '❤'.escape_unicode());
415	/// ```
416	///
417	/// Both are equivalent to:
418	///
419	/// ```
420	/// println!("`\\`u{{2764}}");
421	/// ```
422	///
423	/// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
424	///
425	/// ```
426	/// assert_eq!('❤'.escape_unicode().to_string(), "`\\`u{2764}");
427	/// ```
428	#[must_use = "this returns the escaped char as an iterator, \
429	without modifying the original"]
430	#[stable(feature = "rust1", since = "1.0.0")]
431	#[inline]
432	pub fn escape_unicode(self) -> EscapeUnicode {
433	EscapeUnicode::new(self)
434	}
435
436	/// An extended version of `escape_debug` that optionally permits escaping
437	/// Extended Grapheme codepoints, single quotes, and double quotes. This
438	/// allows us to format characters like nonspacing marks better when they're
439	/// at the start of a string, and allows escaping single quotes in
440	/// characters, and double quotes in strings.
441	#[inline]
442	pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> EscapeDebug {
443	match self {
444	'`\0`' => EscapeDebug::backslash(ascii::Char::Digit0),
445	'`\t`' => EscapeDebug::backslash(ascii::Char::SmallT),
446	'`\r`' => EscapeDebug::backslash(ascii::Char::SmallR),
447	'`\n`' => EscapeDebug::backslash(ascii::Char::SmallN),
448	'`\\`' => EscapeDebug::backslash(ascii::Char::ReverseSolidus),
449	'`\"`' if args.escape_double_quote => EscapeDebug::backslash(ascii::Char::QuotationMark),
450	'`\'`' if args.escape_single_quote => EscapeDebug::backslash(ascii::Char::Apostrophe),
451	_ if args.escape_grapheme_extended && self.is_grapheme_extended() => {
452	EscapeDebug::from_unicode(self.escape_unicode())
453	}
454	_ if is_printable(self) => EscapeDebug::printable(self),
455	_ => EscapeDebug::from_unicode(self.escape_unicode()),
456	}
457	}
458
459	/// Returns an iterator that yields the literal escape code of a character
460	/// as `char`s.
461	///
462	/// This will escape the characters similar to the [`Debug`](core::fmt::Debug) implementations
463	/// of `str` or `char`.
464	///
465	/// # Examples
466	///
467	/// As an iterator:
468	///
469	/// ```
470	/// for c in '`\n`'.escape_debug() {
471	/// print!("{c}");
472	/// }
473	/// println!();
474	/// ```
475	///
476	/// Using `println!` directly:
477	///
478	/// ```
479	/// println!("{}", '`\n`'.escape_debug());
480	/// ```
481	///
482	/// Both are equivalent to:
483	///
484	/// ```
485	/// println!("`\\`n");
486	/// ```
487	///
488	/// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
489	///
490	/// ```
491	/// assert_eq!('`\n`'.escape_debug().to_string(), "`\\`n");
492	/// ```
493	#[must_use = "this returns the escaped char as an iterator, \
494	without modifying the original"]
495	#[stable(feature = "char_escape_debug", since = "1.20.0")]
496	#[inline]
497	pub fn escape_debug(self) -> EscapeDebug {
498	self.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL)
499	}
500
501	/// Returns an iterator that yields the literal escape code of a character
502	/// as `char`s.
503	///
504	/// The default is chosen with a bias toward producing literals that are
505	/// legal in a variety of languages, including C++11 and similar C-family
506	/// languages. The exact rules are:
507	///
508	/// Tab is escaped as `\t`.*
509	/// Carriage return is escaped as `\r`.*
510	/// Line feed is escaped as `\n`.*
511	/// Single quote is escaped as `\'`.*
512	/// Double quote is escaped as `\"`.*
513	/// Backslash is escaped as `\\`.*
514	/// Any character in the 'printable ASCII' range `0x20` .. `0x7e`*
515	/// inclusive is not escaped.
516	/// All other characters are given hexadecimal Unicode escapes; see*
517	/// [`escape_unicode`].
518	///
519	/// [`escape_unicode`]: #method.escape_unicode
520	///
521	/// # Examples
522	///
523	/// As an iterator:
524	///
525	/// ```
526	/// for c in '"'.escape_default() {
527	/// print!("{c}");
528	/// }
529	/// println!();
530	/// ```
531	///
532	/// Using `println!` directly:
533	///
534	/// ```
535	/// println!("{}", '"'.escape_default());
536	/// ```
537	///
538	/// Both are equivalent to:
539	///
540	/// ```
541	/// println!("`\\\"`");
542	/// ```
543	///
544	/// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
545	///
546	/// ```
547	/// assert_eq!('"'.escape_default().to_string(), "`\\\"`");
548	/// ```
549	#[must_use = "this returns the escaped char as an iterator, \
550	without modifying the original"]
551	#[stable(feature = "rust1", since = "1.0.0")]
552	#[inline]
553	pub fn escape_default(self) -> EscapeDefault {
554	match self {
555	'`\t`' => EscapeDefault::backslash(ascii::Char::SmallT),
556	'`\r`' => EscapeDefault::backslash(ascii::Char::SmallR),
557	'`\n`' => EscapeDefault::backslash(ascii::Char::SmallN),
558	'`\\`' \| '`\'`' \| '"' => EscapeDefault::backslash(self.as_ascii().unwrap()),
559	'`\x20`'..='`\x7e`' => EscapeDefault::printable(self.as_ascii().unwrap()),
560	_ => EscapeDefault::from_unicode(self.escape_unicode()),
561	}
562	}
563
564	/// Returns the number of bytes this `char` would need if encoded in UTF-8.
565	///
566	/// That number of bytes is always between 1 and 4, inclusive.
567	///
568	/// # Examples
569	///
570	/// Basic usage:
571	///
572	/// ```
573	/// let len = 'A'.len_utf8();
574	/// assert_eq!(len, `1`);
575	///
576	/// let len = 'ß'.len_utf8();
577	/// assert_eq!(len, `2`);
578	///
579	/// let len = 'ℝ'.len_utf8();
580	/// assert_eq!(len, `3`);
581	///
582	/// let len = '💣'.len_utf8();
583	/// assert_eq!(len, `4`);
584	/// ```
585	///
586	/// The `&str` type guarantees that its contents are UTF-8, and so we can compare the length it
587	/// would take if each code point was represented as a `char` vs in the `&str` itself:
588	///
589	/// ```
590	/// // as chars
591	/// let eastern = '東';
592	/// let capital = '京';
593	///
594	/// // both can be represented as three bytes
595	/// assert_eq!(`3`, eastern.len_utf8());
596	/// assert_eq!(`3`, capital.len_utf8());
597	///
598	/// // as a &str, these two are encoded in UTF-8
599	/// let tokyo = "東京";
600	///
601	/// let len = eastern.len_utf8() + capital.len_utf8();
602	///
603	/// // we can see that they take six bytes total...
604	/// assert_eq!(`6`, tokyo.len());
605	///
606	/// // ... just like the &str
607	/// assert_eq!(len, tokyo.len());
608	/// ```
609	#[stable(feature = "rust1", since = "1.0.0")]
610	#[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]
611	#[inline]
612	pub const fn len_utf8(self) -> usize {
613	len_utf8(self as u32)
614	}
615
616	/// Returns the number of 16-bit code units this `char` would need if
617	/// encoded in UTF-16.
618	///
619	/// That number of code units is always either 1 or 2, for unicode scalar values in
620	/// the [basic multilingual plane] or [supplementary planes] respectively.
621	///
622	/// See the documentation for [`len_utf8()`] for more explanation of this
623	/// concept. This function is a mirror, but for UTF-16 instead of UTF-8.
624	///
625	/// [basic multilingual plane]: http://www.unicode.org/glossary/#basic_multilingual_plane
626	/// [supplementary planes]: http://www.unicode.org/glossary/#supplementary_planes
627	/// [`len_utf8()`]: #method.len_utf8
628	///
629	/// # Examples
630	///
631	/// Basic usage:
632	///
633	/// ```
634	/// let n = 'ß'.len_utf16();
635	/// assert_eq!(n, `1`);
636	///
637	/// let len = '💣'.len_utf16();
638	/// assert_eq!(len, `2`);
639	/// ```
640	#[stable(feature = "rust1", since = "1.0.0")]
641	#[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]
642	#[inline]
643	pub const fn len_utf16(self) -> usize {
644	let ch = self as u32;
645	if (ch & `0xFFFF`) == ch { `1` } else { `2` }
646	}
647
648	/// Encodes this character as UTF-8 into the provided byte buffer,
649	/// and then returns the subslice of the buffer that contains the encoded character.
650	///
651	/// # Panics
652	///
653	/// Panics if the buffer is not large enough.
654	/// A buffer of length four is large enough to encode any `char`.
655	///
656	/// # Examples
657	///
658	/// In both of these examples, 'ß' takes two bytes to encode.
659	///
660	/// ```
661	/// let mut b = [`0`; `2`];
662	///
663	/// let result = 'ß'.encode_utf8(&mut b);
664	///
665	/// assert_eq!(result, "ß");
666	///
667	/// assert_eq!(result.len(), `2`);
668	/// ```
669	///
670	/// A buffer that's too small:
671	///
672	/// ```should_panic
673	/// let mut b = [`0`; `1`];
674	///
675	/// // this panics
676	/// 'ß'.encode_utf8(&mut b);
677	/// ```
678	#[stable(feature = "unicode_encode_char", since = "1.15.0")]
679	#[inline]
680	pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
681	// SAFETY: `char` is not a surrogate, so this is valid UTF-8.
682	unsafe { from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) }
683	}
684
685	/// Encodes this character as UTF-16 into the provided `u16` buffer,
686	/// and then returns the subslice of the buffer that contains the encoded character.
687	///
688	/// # Panics
689	///
690	/// Panics if the buffer is not large enough.
691	/// A buffer of length 2 is large enough to encode any `char`.
692	///
693	/// # Examples
694	///
695	/// In both of these examples, '𝕊' takes two `u16`s to encode.
696	///
697	/// ```
698	/// let mut b = [`0`; `2`];
699	///
700	/// let result = '𝕊'.encode_utf16(&mut b);
701	///
702	/// assert_eq!(result.len(), `2`);
703	/// ```
704	///
705	/// A buffer that's too small:
706	///
707	/// ```should_panic
708	/// let mut b = [`0`; `1`];
709	///
710	/// // this panics
711	/// '𝕊'.encode_utf16(&mut b);
712	/// ```
713	#[stable(feature = "unicode_encode_char", since = "1.15.0")]
714	#[inline]
715	pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
716	encode_utf16_raw(self as u32, dst)
717	}
718
719	/// Returns `true` if this `char` has the `Alphabetic` property.
720	///
721	/// `Alphabetic` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
722	/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
723	///
724	/// [Unicode Standard]: https://www.unicode.org/versions/latest/
725	/// [ucd]: https://www.unicode.org/reports/tr44/
726	/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
727	///
728	/// # Examples
729	///
730	/// Basic usage:
731	///
732	/// ```
733	/// assert!('a'.is_alphabetic());
734	/// assert!('京'.is_alphabetic());
735	///
736	/// let c = '💝';
737	/// // love is many things, but it is not alphabetic
738	/// assert!(!c.is_alphabetic());
739	/// ```
740	#[must_use]
741	#[stable(feature = "rust1", since = "1.0.0")]
742	#[inline]
743	pub fn is_alphabetic(self) -> bool {
744	match self {
745	'a'..='z' \| 'A'..='Z' => `true`,
746	c => c > '`\x7f`' && unicode::Alphabetic(c),
747	}
748	}
749
750	/// Returns `true` if this `char` has the `Lowercase` property.
751	///
752	/// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
753	/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
754	///
755	/// [Unicode Standard]: https://www.unicode.org/versions/latest/
756	/// [ucd]: https://www.unicode.org/reports/tr44/
757	/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
758	///
759	/// # Examples
760	///
761	/// Basic usage:
762	///
763	/// ```
764	/// assert!('a'.is_lowercase());
765	/// assert!('δ'.is_lowercase());
766	/// assert!(!'A'.is_lowercase());
767	/// assert!(!'Δ'.is_lowercase());
768	///
769	/// // The various Chinese scripts and punctuation do not have case, and so:
770	/// assert!(!'中'.is_lowercase());
771	/// assert!(!' '.is_lowercase());
772	/// ```
773	///
774	/// In a const context:
775	///
776	/// ```
777	/// #![feature(const_unicode_case_lookup)]
778	/// const CAPITAL_DELTA_IS_LOWERCASE: bool = 'Δ'.is_lowercase();
779	/// assert!(!CAPITAL_DELTA_IS_LOWERCASE);
780	/// ```
781	#[must_use]
782	#[stable(feature = "rust1", since = "1.0.0")]
783	#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
784	#[inline]
785	pub const fn is_lowercase(self) -> bool {
786	match self {
787	'a'..='z' => `true`,
788	c => c > '`\x7f`' && unicode::Lowercase(c),
789	}
790	}
791
792	/// Returns `true` if this `char` has the `Uppercase` property.
793	///
794	/// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
795	/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
796	///
797	/// [Unicode Standard]: https://www.unicode.org/versions/latest/
798	/// [ucd]: https://www.unicode.org/reports/tr44/
799	/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
800	///
801	/// # Examples
802	///
803	/// Basic usage:
804	///
805	/// ```
806	/// assert!(!'a'.is_uppercase());
807	/// assert!(!'δ'.is_uppercase());
808	/// assert!('A'.is_uppercase());
809	/// assert!('Δ'.is_uppercase());
810	///
811	/// // The various Chinese scripts and punctuation do not have case, and so:
812	/// assert!(!'中'.is_uppercase());
813	/// assert!(!' '.is_uppercase());
814	/// ```
815	///
816	/// In a const context:
817	///
818	/// ```
819	/// #![feature(const_unicode_case_lookup)]
820	/// const CAPITAL_DELTA_IS_UPPERCASE: bool = 'Δ'.is_uppercase();
821	/// assert!(CAPITAL_DELTA_IS_UPPERCASE);
822	/// ```
823	#[must_use]
824	#[stable(feature = "rust1", since = "1.0.0")]
825	#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
826	#[inline]
827	pub const fn is_uppercase(self) -> bool {
828	match self {
829	'A'..='Z' => `true`,
830	c => c > '`\x7f`' && unicode::Uppercase(c),
831	}
832	}
833
834	/// Returns `true` if this `char` has the `White_Space` property.
835	///
836	/// `White_Space` is specified in the [Unicode Character Database][ucd] [`PropList.txt`].
837	///
838	/// [ucd]: https://www.unicode.org/reports/tr44/
839	/// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
840	///
841	/// # Examples
842	///
843	/// Basic usage:
844	///
845	/// ```
846	/// assert!(' '.is_whitespace());
847	///
848	/// // line break
849	/// assert!('`\n`'.is_whitespace());
850	///
851	/// // a non-breaking space
852	/// assert!('`\u{A0}`'.is_whitespace());
853	///
854	/// assert!(!'越'.is_whitespace());
855	/// ```
856	#[must_use]
857	#[stable(feature = "rust1", since = "1.0.0")]
858	#[inline]
859	pub fn is_whitespace(self) -> bool {
860	match self {
861	' ' \| '`\x09`'..='`\x0d`' => `true`,
862	c => c > '`\x7f`' && unicode::White_Space(c),
863	}
864	}
865
866	/// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`].
867	///
868	/// [`is_alphabetic()`]: #method.is_alphabetic
869	/// [`is_numeric()`]: #method.is_numeric
870	///
871	/// # Examples
872	///
873	/// Basic usage:
874	///
875	/// ```
876	/// assert!('٣'.is_alphanumeric());
877	/// assert!('7'.is_alphanumeric());
878	/// assert!('৬'.is_alphanumeric());
879	/// assert!('¾'.is_alphanumeric());
880	/// assert!('①'.is_alphanumeric());
881	/// assert!('K'.is_alphanumeric());
882	/// assert!('و'.is_alphanumeric());
883	/// assert!('藏'.is_alphanumeric());
884	/// ```
885	#[must_use]
886	#[stable(feature = "rust1", since = "1.0.0")]
887	#[inline]
888	pub fn is_alphanumeric(self) -> bool {
889	self.is_alphabetic() \|\| self.is_numeric()
890	}
891
892	/// Returns `true` if this `char` has the general category for control codes.
893	///
894	/// Control codes (code points with the general category of `Cc`) are described in Chapter 4
895	/// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character
896	/// Database][ucd] [`UnicodeData.txt`].
897	///
898	/// [Unicode Standard]: https://www.unicode.org/versions/latest/
899	/// [ucd]: https://www.unicode.org/reports/tr44/
900	/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
901	///
902	/// # Examples
903	///
904	/// Basic usage:
905	///
906	/// ```
907	/// // U+009C, STRING TERMINATOR
908	/// assert!(''.is_control());
909	/// assert!(!'q'.is_control());
910	/// ```
911	#[must_use]
912	#[stable(feature = "rust1", since = "1.0.0")]
913	#[inline]
914	pub fn is_control(self) -> bool {
915	unicode::Cc(self)
916	}
917
918	/// Returns `true` if this `char` has the `Grapheme_Extend` property.
919	///
920	/// `Grapheme_Extend` is described in [Unicode Standard Annex #29 (Unicode Text
921	/// Segmentation)][uax29] and specified in the [Unicode Character Database][ucd]
922	/// [`DerivedCoreProperties.txt`].
923	///
924	/// [uax29]: https://www.unicode.org/reports/tr29/
925	/// [ucd]: https://www.unicode.org/reports/tr44/
926	/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
927	#[must_use]
928	#[inline]
929	pub(crate) fn is_grapheme_extended(self) -> bool {
930	unicode::Grapheme_Extend(self)
931	}
932
933	/// Returns `true` if this `char` has one of the general categories for numbers.
934	///
935	/// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric
936	/// characters, and `No` for other numeric characters) are specified in the [Unicode Character
937	/// Database][ucd] [`UnicodeData.txt`].
938	///
939	/// This method doesn't cover everything that could be considered a number, e.g. ideographic numbers like '三'.
940	/// If you want everything including characters with overlapping purposes then you might want to use
941	/// a unicode or language-processing library that exposes the appropriate character properties instead
942	/// of looking at the unicode categories.
943	///
944	/// If you want to parse ASCII decimal digits (0-9) or ASCII base-N, use
945	/// `is_ascii_digit` or `is_digit` instead.
946	///
947	/// [Unicode Standard]: https://www.unicode.org/versions/latest/
948	/// [ucd]: https://www.unicode.org/reports/tr44/
949	/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
950	///
951	/// # Examples
952	///
953	/// Basic usage:
954	///
955	/// ```
956	/// assert!('٣'.is_numeric());
957	/// assert!('7'.is_numeric());
958	/// assert!('৬'.is_numeric());
959	/// assert!('¾'.is_numeric());
960	/// assert!('①'.is_numeric());
961	/// assert!(!'K'.is_numeric());
962	/// assert!(!'و'.is_numeric());
963	/// assert!(!'藏'.is_numeric());
964	/// assert!(!'三'.is_numeric());
965	/// ```
966	#[must_use]
967	#[stable(feature = "rust1", since = "1.0.0")]
968	#[inline]
969	pub fn is_numeric(self) -> bool {
970	match self {
971	'0'..='9' => `true`,
972	c => c > '`\x7f`' && unicode::N(c),
973	}
974	}
975
976	/// Returns an iterator that yields the lowercase mapping of this `char` as one or more
977	/// `char`s.
978	///
979	/// If this `char` does not have a lowercase mapping, the iterator yields the same `char`.
980	///
981	/// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character
982	/// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
983	///
984	/// [ucd]: https://www.unicode.org/reports/tr44/
985	/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
986	///
987	/// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
988	/// the `char`(s) given by [`SpecialCasing.txt`].
989	///
990	/// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
991	///
992	/// This operation performs an unconditional mapping without tailoring. That is, the conversion
993	/// is independent of context and language.
994	///
995	/// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
996	/// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
997	///
998	/// [Unicode Standard]: https://www.unicode.org/versions/latest/
999	///
1000	/// # Examples
1001	///
1002	/// As an iterator:
1003	///
1004	/// ```
1005	/// for c in 'İ'.to_lowercase() {
1006	/// print!("{c}");
1007	/// }
1008	/// println!();
1009	/// ```
1010	///
1011	/// Using `println!` directly:
1012	///
1013	/// ```
1014	/// println!("{}", 'İ'.to_lowercase());
1015	/// ```
1016	///
1017	/// Both are equivalent to:
1018	///
1019	/// ```
1020	/// println!("i`\u{307}`");
1021	/// ```
1022	///
1023	/// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1024	///
1025	/// ```
1026	/// assert_eq!('C'.to_lowercase().to_string(), "c");
1027	///
1028	/// // Sometimes the result is more than one character:
1029	/// assert_eq!('İ'.to_lowercase().to_string(), "i`\u{307}`");
1030	///
1031	/// // Characters that do not have both uppercase and lowercase
1032	/// // convert into themselves.
1033	/// assert_eq!('山'.to_lowercase().to_string(), "山");
1034	/// ```
1035	#[must_use = "this returns the lowercase character as a new iterator, \
1036	without modifying the original"]
1037	#[stable(feature = "rust1", since = "1.0.0")]
1038	#[inline]
1039	pub fn to_lowercase(self) -> ToLowercase {
1040	ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
1041	}
1042
1043	/// Returns an iterator that yields the uppercase mapping of this `char` as one or more
1044	/// `char`s.
1045	///
1046	/// If this `char` does not have an uppercase mapping, the iterator yields the same `char`.
1047	///
1048	/// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character
1049	/// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1050	///
1051	/// [ucd]: https://www.unicode.org/reports/tr44/
1052	/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1053	///
1054	/// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
1055	/// the `char`(s) given by [`SpecialCasing.txt`].
1056	///
1057	/// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1058	///
1059	/// This operation performs an unconditional mapping without tailoring. That is, the conversion
1060	/// is independent of context and language.
1061	///
1062	/// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1063	/// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1064	///
1065	/// [Unicode Standard]: https://www.unicode.org/versions/latest/
1066	///
1067	/// # Examples
1068	///
1069	/// As an iterator:
1070	///
1071	/// ```
1072	/// for c in 'ß'.to_uppercase() {
1073	/// print!("{c}");
1074	/// }
1075	/// println!();
1076	/// ```
1077	///
1078	/// Using `println!` directly:
1079	///
1080	/// ```
1081	/// println!("{}", 'ß'.to_uppercase());
1082	/// ```
1083	///
1084	/// Both are equivalent to:
1085	///
1086	/// ```
1087	/// println!("SS");
1088	/// ```
1089	///
1090	/// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1091	///
1092	/// ```
1093	/// assert_eq!('c'.to_uppercase().to_string(), "C");
1094	///
1095	/// // Sometimes the result is more than one character:
1096	/// assert_eq!('ß'.to_uppercase().to_string(), "SS");
1097	///
1098	/// // Characters that do not have both uppercase and lowercase
1099	/// // convert into themselves.
1100	/// assert_eq!('山'.to_uppercase().to_string(), "山");
1101	/// ```
1102	///
1103	/// # Note on locale
1104	///
1105	/// In Turkish, the equivalent of 'i' in Latin has five forms instead of two:
1106	///
1107	/// 'Dotless': I / ı, sometimes written ï*
1108	/// 'Dotted': İ / i*
1109	///
1110	/// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
1111	///
1112	/// ```
1113	/// let upper_i = 'i'.to_uppercase().to_string();
1114	/// ```
1115	///
1116	/// The value of `upper_i` here relies on the language of the text: if we're
1117	/// in `en-US`, it should be `"I"`, but if we're in `tr_TR`, it should
1118	/// be `"İ"`. `to_uppercase()` does not take this into account, and so:
1119	///
1120	/// ```
1121	/// let upper_i = 'i'.to_uppercase().to_string();
1122	///
1123	/// assert_eq!(upper_i, "I");
1124	/// ```
1125	///
1126	/// holds across languages.
1127	#[must_use = "this returns the uppercase character as a new iterator, \
1128	without modifying the original"]
1129	#[stable(feature = "rust1", since = "1.0.0")]
1130	#[inline]
1131	pub fn to_uppercase(self) -> ToUppercase {
1132	ToUppercase(CaseMappingIter::new(conversions::to_upper(self)))
1133	}
1134
1135	/// Checks if the value is within the ASCII range.
1136	///
1137	/// # Examples
1138	///
1139	/// ```
1140	/// let ascii = 'a';
1141	/// let non_ascii = '❤';
1142	///
1143	/// assert!(ascii.is_ascii());
1144	/// assert!(!non_ascii.is_ascii());
1145	/// ```
1146	#[must_use]
1147	#[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1148	#[rustc_const_stable(feature = "const_char_is_ascii", since = "1.32.0")]
1149	#[inline]
1150	pub const fn is_ascii(&self) -> bool {
1151	self as u32* <= `0x7F`
1152	}
1153
1154	/// Returns `Some` if the value is within the ASCII range,
1155	/// or `None` if it's not.
1156	///
1157	/// This is preferred to [`Self::is_ascii`] when you're passing the value
1158	/// along to something else that can take [`ascii::Char`] rather than
1159	/// needing to check again for itself whether the value is in ASCII.
1160	#[must_use]
1161	#[unstable(feature = "ascii_char", issue = "110998")]
1162	#[inline]
1163	pub const fn as_ascii(&self) -> Option<ascii::Char> {
1164	if self.is_ascii() {
1165	// SAFETY: Just checked that this is ASCII.
1166	Some(unsafe { ascii::Char::from_u8_unchecked(*self as u8) })
1167	} else {
1168	None
1169	}
1170	}
1171
1172	/// Makes a copy of the value in its ASCII upper case equivalent.
1173	///
1174	/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1175	/// but non-ASCII letters are unchanged.
1176	///
1177	/// To uppercase the value in-place, use [`make_ascii_uppercase()`].
1178	///
1179	/// To uppercase ASCII characters in addition to non-ASCII characters, use
1180	/// [`to_uppercase()`].
1181	///
1182	/// # Examples
1183	///
1184	/// ```
1185	/// let ascii = 'a';
1186	/// let non_ascii = '❤';
1187	///
1188	/// assert_eq!('A', ascii.to_ascii_uppercase());
1189	/// assert_eq!('❤', non_ascii.to_ascii_uppercase());
1190	/// ```
1191	///
1192	/// [`make_ascii_uppercase()`]: #method.make_ascii_uppercase
1193	/// [`to_uppercase()`]: #method.to_uppercase
1194	#[must_use = "to uppercase the value in-place, use `make_ascii_uppercase()`"]
1195	#[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1196	#[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1197	#[inline]
1198	pub const fn to_ascii_uppercase(&self) -> char {
1199	if self.is_ascii_lowercase() {
1200	(self as u8).ascii_change_case_unchecked() as char*
1201	} else {
1202	*self
1203	}
1204	}
1205
1206	/// Makes a copy of the value in its ASCII lower case equivalent.
1207	///
1208	/// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1209	/// but non-ASCII letters are unchanged.
1210	///
1211	/// To lowercase the value in-place, use [`make_ascii_lowercase()`].
1212	///
1213	/// To lowercase ASCII characters in addition to non-ASCII characters, use
1214	/// [`to_lowercase()`].
1215	///
1216	/// # Examples
1217	///
1218	/// ```
1219	/// let ascii = 'A';
1220	/// let non_ascii = '❤';
1221	///
1222	/// assert_eq!('a', ascii.to_ascii_lowercase());
1223	/// assert_eq!('❤', non_ascii.to_ascii_lowercase());
1224	/// ```
1225	///
1226	/// [`make_ascii_lowercase()`]: #method.make_ascii_lowercase
1227	/// [`to_lowercase()`]: #method.to_lowercase
1228	#[must_use = "to lowercase the value in-place, use `make_ascii_lowercase()`"]
1229	#[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1230	#[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1231	#[inline]
1232	pub const fn to_ascii_lowercase(&self) -> char {
1233	if self.is_ascii_uppercase() {
1234	(self as u8).ascii_change_case_unchecked() as char*
1235	} else {
1236	*self
1237	}
1238	}
1239
1240	/// Checks that two values are an ASCII case-insensitive match.
1241	///
1242	/// Equivalent to <code>[to_ascii_lowercase]\(a) == [to_ascii_lowercase]\(b)</code>.
1243	///
1244	/// # Examples
1245	///
1246	/// ```
1247	/// let upper_a = 'A';
1248	/// let lower_a = 'a';
1249	/// let lower_z = 'z';
1250	///
1251	/// assert!(upper_a.eq_ignore_ascii_case(&lower_a));
1252	/// assert!(upper_a.eq_ignore_ascii_case(&upper_a));
1253	/// assert!(!upper_a.eq_ignore_ascii_case(&lower_z));
1254	/// ```
1255	///
1256	/// [to_ascii_lowercase]: #method.to_ascii_lowercase
1257	#[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1258	#[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1259	#[inline]
1260	pub const fn eq_ignore_ascii_case(&self, other: &char) -> bool {
1261	self.to_ascii_lowercase() == other.to_ascii_lowercase()
1262	}
1263
1264	/// Converts this type to its ASCII upper case equivalent in-place.
1265	///
1266	/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1267	/// but non-ASCII letters are unchanged.
1268	///
1269	/// To return a new uppercased value without modifying the existing one, use
1270	/// [`to_ascii_uppercase()`].
1271	///
1272	/// # Examples
1273	///
1274	/// ```
1275	/// let mut ascii = 'a';
1276	///
1277	/// ascii.make_ascii_uppercase();
1278	///
1279	/// assert_eq!('A', ascii);
1280	/// ```
1281	///
1282	/// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase
1283	#[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1284	#[inline]
1285	pub fn make_ascii_uppercase(&mut self) {
1286	*self = self.to_ascii_uppercase();
1287	}
1288
1289	/// Converts this type to its ASCII lower case equivalent in-place.
1290	///
1291	/// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1292	/// but non-ASCII letters are unchanged.
1293	///
1294	/// To return a new lowercased value without modifying the existing one, use
1295	/// [`to_ascii_lowercase()`].
1296	///
1297	/// # Examples
1298	///
1299	/// ```
1300	/// let mut ascii = 'A';
1301	///
1302	/// ascii.make_ascii_lowercase();
1303	///
1304	/// assert_eq!('a', ascii);
1305	/// ```
1306	///
1307	/// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase
1308	#[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1309	#[inline]
1310	pub fn make_ascii_lowercase(&mut self) {
1311	*self = self.to_ascii_lowercase();
1312	}
1313
1314	/// Checks if the value is an ASCII alphabetic character:
1315	///
1316	/// - U+0041 'A' ..= U+005A 'Z', or
1317	/// - U+0061 'a' ..= U+007A 'z'.
1318	///
1319	/// # Examples
1320	///
1321	/// ```
1322	/// let uppercase_a = 'A';
1323	/// let uppercase_g = 'G';
1324	/// let a = 'a';
1325	/// let g = 'g';
1326	/// let zero = '0';
1327	/// let percent = '%';
1328	/// let space = ' ';
1329	/// let lf = '`\n`';
1330	/// let esc = '`\x1b`';
1331	///
1332	/// assert!(uppercase_a.is_ascii_alphabetic());
1333	/// assert!(uppercase_g.is_ascii_alphabetic());
1334	/// assert!(a.is_ascii_alphabetic());
1335	/// assert!(g.is_ascii_alphabetic());
1336	/// assert!(!zero.is_ascii_alphabetic());
1337	/// assert!(!percent.is_ascii_alphabetic());
1338	/// assert!(!space.is_ascii_alphabetic());
1339	/// assert!(!lf.is_ascii_alphabetic());
1340	/// assert!(!esc.is_ascii_alphabetic());
1341	/// ```
1342	#[must_use]
1343	#[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1344	#[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1345	#[inline]
1346	pub const fn is_ascii_alphabetic(&self) -> bool {
1347	matches!(*self, 'A'..='Z' \| 'a'..='z')
1348	}
1349
1350	/// Checks if the value is an ASCII uppercase character:
1351	/// U+0041 'A' ..= U+005A 'Z'.
1352	///
1353	/// # Examples
1354	///
1355	/// ```
1356	/// let uppercase_a = 'A';
1357	/// let uppercase_g = 'G';
1358	/// let a = 'a';
1359	/// let g = 'g';
1360	/// let zero = '0';
1361	/// let percent = '%';
1362	/// let space = ' ';
1363	/// let lf = '`\n`';
1364	/// let esc = '`\x1b`';
1365	///
1366	/// assert!(uppercase_a.is_ascii_uppercase());
1367	/// assert!(uppercase_g.is_ascii_uppercase());
1368	/// assert!(!a.is_ascii_uppercase());
1369	/// assert!(!g.is_ascii_uppercase());
1370	/// assert!(!zero.is_ascii_uppercase());
1371	/// assert!(!percent.is_ascii_uppercase());
1372	/// assert!(!space.is_ascii_uppercase());
1373	/// assert!(!lf.is_ascii_uppercase());
1374	/// assert!(!esc.is_ascii_uppercase());
1375	/// ```
1376	#[must_use]
1377	#[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1378	#[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1379	#[inline]
1380	pub const fn is_ascii_uppercase(&self) -> bool {
1381	matches!(*self, 'A'..='Z')
1382	}
1383
1384	/// Checks if the value is an ASCII lowercase character:
1385	/// U+0061 'a' ..= U+007A 'z'.
1386	///
1387	/// # Examples
1388	///
1389	/// ```
1390	/// let uppercase_a = 'A';
1391	/// let uppercase_g = 'G';
1392	/// let a = 'a';
1393	/// let g = 'g';
1394	/// let zero = '0';
1395	/// let percent = '%';
1396	/// let space = ' ';
1397	/// let lf = '`\n`';
1398	/// let esc = '`\x1b`';
1399	///
1400	/// assert!(!uppercase_a.is_ascii_lowercase());
1401	/// assert!(!uppercase_g.is_ascii_lowercase());
1402	/// assert!(a.is_ascii_lowercase());
1403	/// assert!(g.is_ascii_lowercase());
1404	/// assert!(!zero.is_ascii_lowercase());
1405	/// assert!(!percent.is_ascii_lowercase());
1406	/// assert!(!space.is_ascii_lowercase());
1407	/// assert!(!lf.is_ascii_lowercase());
1408	/// assert!(!esc.is_ascii_lowercase());
1409	/// ```
1410	#[must_use]
1411	#[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1412	#[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1413	#[inline]
1414	pub const fn is_ascii_lowercase(&self) -> bool {
1415	matches!(*self, 'a'..='z')
1416	}
1417
1418	/// Checks if the value is an ASCII alphanumeric character:
1419	///
1420	/// - U+0041 'A' ..= U+005A 'Z', or
1421	/// - U+0061 'a' ..= U+007A 'z', or
1422	/// - U+0030 '0' ..= U+0039 '9'.
1423	///
1424	/// # Examples
1425	///
1426	/// ```
1427	/// let uppercase_a = 'A';
1428	/// let uppercase_g = 'G';
1429	/// let a = 'a';
1430	/// let g = 'g';
1431	/// let zero = '0';
1432	/// let percent = '%';
1433	/// let space = ' ';
1434	/// let lf = '`\n`';
1435	/// let esc = '`\x1b`';
1436	///
1437	/// assert!(uppercase_a.is_ascii_alphanumeric());
1438	/// assert!(uppercase_g.is_ascii_alphanumeric());
1439	/// assert!(a.is_ascii_alphanumeric());
1440	/// assert!(g.is_ascii_alphanumeric());
1441	/// assert!(zero.is_ascii_alphanumeric());
1442	/// assert!(!percent.is_ascii_alphanumeric());
1443	/// assert!(!space.is_ascii_alphanumeric());
1444	/// assert!(!lf.is_ascii_alphanumeric());
1445	/// assert!(!esc.is_ascii_alphanumeric());
1446	/// ```
1447	#[must_use]
1448	#[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1449	#[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1450	#[inline]
1451	pub const fn is_ascii_alphanumeric(&self) -> bool {
1452	matches!(self, '0'..='9') \| matches!(self, 'A'..='Z') \| matches!(*self, 'a'..='z')
1453	}
1454
1455	/// Checks if the value is an ASCII decimal digit:
1456	/// U+0030 '0' ..= U+0039 '9'.
1457	///
1458	/// # Examples
1459	///
1460	/// ```
1461	/// let uppercase_a = 'A';
1462	/// let uppercase_g = 'G';
1463	/// let a = 'a';
1464	/// let g = 'g';
1465	/// let zero = '0';
1466	/// let percent = '%';
1467	/// let space = ' ';
1468	/// let lf = '`\n`';
1469	/// let esc = '`\x1b`';
1470	///
1471	/// assert!(!uppercase_a.is_ascii_digit());
1472	/// assert!(!uppercase_g.is_ascii_digit());
1473	/// assert!(!a.is_ascii_digit());
1474	/// assert!(!g.is_ascii_digit());
1475	/// assert!(zero.is_ascii_digit());
1476	/// assert!(!percent.is_ascii_digit());
1477	/// assert!(!space.is_ascii_digit());
1478	/// assert!(!lf.is_ascii_digit());
1479	/// assert!(!esc.is_ascii_digit());
1480	/// ```
1481	#[must_use]
1482	#[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1483	#[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1484	#[inline]
1485	pub const fn is_ascii_digit(&self) -> bool {
1486	matches!(*self, '0'..='9')
1487	}
1488
1489	/// Checks if the value is an ASCII octal digit:
1490	/// U+0030 '0' ..= U+0037 '7'.
1491	///
1492	/// # Examples
1493	///
1494	/// ```
1495	/// #![feature(is_ascii_octdigit)]
1496	///
1497	/// let uppercase_a = 'A';
1498	/// let a = 'a';
1499	/// let zero = '0';
1500	/// let seven = '7';
1501	/// let nine = '9';
1502	/// let percent = '%';
1503	/// let lf = '`\n`';
1504	///
1505	/// assert!(!uppercase_a.is_ascii_octdigit());
1506	/// assert!(!a.is_ascii_octdigit());
1507	/// assert!(zero.is_ascii_octdigit());
1508	/// assert!(seven.is_ascii_octdigit());
1509	/// assert!(!nine.is_ascii_octdigit());
1510	/// assert!(!percent.is_ascii_octdigit());
1511	/// assert!(!lf.is_ascii_octdigit());
1512	/// ```
1513	#[must_use]
1514	#[unstable(feature = "is_ascii_octdigit", issue = "101288")]
1515	#[rustc_const_unstable(feature = "is_ascii_octdigit", issue = "101288")]
1516	#[inline]
1517	pub const fn is_ascii_octdigit(&self) -> bool {
1518	matches!(*self, '0'..='7')
1519	}
1520
1521	/// Checks if the value is an ASCII hexadecimal digit:
1522	///
1523	/// - U+0030 '0' ..= U+0039 '9', or
1524	/// - U+0041 'A' ..= U+0046 'F', or
1525	/// - U+0061 'a' ..= U+0066 'f'.
1526	///
1527	/// # Examples
1528	///
1529	/// ```
1530	/// let uppercase_a = 'A';
1531	/// let uppercase_g = 'G';
1532	/// let a = 'a';
1533	/// let g = 'g';
1534	/// let zero = '0';
1535	/// let percent = '%';
1536	/// let space = ' ';
1537	/// let lf = '`\n`';
1538	/// let esc = '`\x1b`';
1539	///
1540	/// assert!(uppercase_a.is_ascii_hexdigit());
1541	/// assert!(!uppercase_g.is_ascii_hexdigit());
1542	/// assert!(a.is_ascii_hexdigit());
1543	/// assert!(!g.is_ascii_hexdigit());
1544	/// assert!(zero.is_ascii_hexdigit());
1545	/// assert!(!percent.is_ascii_hexdigit());
1546	/// assert!(!space.is_ascii_hexdigit());
1547	/// assert!(!lf.is_ascii_hexdigit());
1548	/// assert!(!esc.is_ascii_hexdigit());
1549	/// ```
1550	#[must_use]
1551	#[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1552	#[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1553	#[inline]
1554	pub const fn is_ascii_hexdigit(&self) -> bool {
1555	matches!(self, '0'..='9') \| matches!(self, 'A'..='F') \| matches!(*self, 'a'..='f')
1556	}
1557
1558	/// Checks if the value is an ASCII punctuation character:
1559	///
1560	/// - U+0021 ..= U+002F `! " # $ % & ' ( ) + , - . /`, or*
1561	/// - U+003A ..= U+0040 `: ; < = > ? @`, or
1562	/// - U+005B ..= U+0060 ``[ \ ] ^ _ ` ``, or
1563	/// - U+007B ..= U+007E `{ \| } ~`
1564	///
1565	/// # Examples
1566	///
1567	/// ```
1568	/// let uppercase_a = 'A';
1569	/// let uppercase_g = 'G';
1570	/// let a = 'a';
1571	/// let g = 'g';
1572	/// let zero = '0';
1573	/// let percent = '%';
1574	/// let space = ' ';
1575	/// let lf = '`\n`';
1576	/// let esc = '`\x1b`';
1577	///
1578	/// assert!(!uppercase_a.is_ascii_punctuation());
1579	/// assert!(!uppercase_g.is_ascii_punctuation());
1580	/// assert!(!a.is_ascii_punctuation());
1581	/// assert!(!g.is_ascii_punctuation());
1582	/// assert!(!zero.is_ascii_punctuation());
1583	/// assert!(percent.is_ascii_punctuation());
1584	/// assert!(!space.is_ascii_punctuation());
1585	/// assert!(!lf.is_ascii_punctuation());
1586	/// assert!(!esc.is_ascii_punctuation());
1587	/// ```
1588	#[must_use]
1589	#[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1590	#[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1591	#[inline]
1592	pub const fn is_ascii_punctuation(&self) -> bool {
1593	matches!(*self, '!'..='/')
1594	\| matches!(*self, ':'..='@')
1595	\| matches!(*self, '['..='`')
1596	\| matches!(*self, '{'..='~')
1597	}
1598
1599	/// Checks if the value is an ASCII graphic character:
1600	/// U+0021 '!' ..= U+007E '~'.
1601	///
1602	/// # Examples
1603	///
1604	/// ```
1605	/// let uppercase_a = 'A';
1606	/// let uppercase_g = 'G';
1607	/// let a = 'a';
1608	/// let g = 'g';
1609	/// let zero = '0';
1610	/// let percent = '%';
1611	/// let space = ' ';
1612	/// let lf = '`\n`';
1613	/// let esc = '`\x1b`';
1614	///
1615	/// assert!(uppercase_a.is_ascii_graphic());
1616	/// assert!(uppercase_g.is_ascii_graphic());
1617	/// assert!(a.is_ascii_graphic());
1618	/// assert!(g.is_ascii_graphic());
1619	/// assert!(zero.is_ascii_graphic());
1620	/// assert!(percent.is_ascii_graphic());
1621	/// assert!(!space.is_ascii_graphic());
1622	/// assert!(!lf.is_ascii_graphic());
1623	/// assert!(!esc.is_ascii_graphic());
1624	/// ```
1625	#[must_use]
1626	#[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1627	#[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1628	#[inline]
1629	pub const fn is_ascii_graphic(&self) -> bool {
1630	matches!(*self, '!'..='~')
1631	}
1632
1633	/// Checks if the value is an ASCII whitespace character:
1634	/// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
1635	/// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
1636	///
1637	/// Rust uses the WhatWG Infra Standard's [definition of ASCII
1638	/// whitespace][infra-aw]. There are several other definitions in
1639	/// wide use. For instance, [the POSIX locale][pct] includes
1640	/// U+000B VERTICAL TAB as well as all the above characters,
1641	/// but—from the very same specification—[the default rule for
1642	/// "field splitting" in the Bourne shell][bfs] considers only
1643	/// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace.
1644	///
1645	/// If you are writing a program that will process an existing
1646	/// file format, check what that format's definition of whitespace is
1647	/// before using this function.
1648	///
1649	/// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace
1650	/// [pct]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01
1651	/// [bfs]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05
1652	///
1653	/// # Examples
1654	///
1655	/// ```
1656	/// let uppercase_a = 'A';
1657	/// let uppercase_g = 'G';
1658	/// let a = 'a';
1659	/// let g = 'g';
1660	/// let zero = '0';
1661	/// let percent = '%';
1662	/// let space = ' ';
1663	/// let lf = '`\n`';
1664	/// let esc = '`\x1b`';
1665	///
1666	/// assert!(!uppercase_a.is_ascii_whitespace());
1667	/// assert!(!uppercase_g.is_ascii_whitespace());
1668	/// assert!(!a.is_ascii_whitespace());
1669	/// assert!(!g.is_ascii_whitespace());
1670	/// assert!(!zero.is_ascii_whitespace());
1671	/// assert!(!percent.is_ascii_whitespace());
1672	/// assert!(space.is_ascii_whitespace());
1673	/// assert!(lf.is_ascii_whitespace());
1674	/// assert!(!esc.is_ascii_whitespace());
1675	/// ```
1676	#[must_use]
1677	#[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1678	#[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1679	#[inline]
1680	pub const fn is_ascii_whitespace(&self) -> bool {
1681	matches!(*self, '`\t`' \| '`\n`' \| '`\x0C`' \| '`\r`' \| ' ')
1682	}
1683
1684	/// Checks if the value is an ASCII control character:
1685	/// U+0000 NUL ..= U+001F UNIT SEPARATOR, or U+007F DELETE.
1686	/// Note that most ASCII whitespace characters are control
1687	/// characters, but SPACE is not.
1688	///
1689	/// # Examples
1690	///
1691	/// ```
1692	/// let uppercase_a = 'A';
1693	/// let uppercase_g = 'G';
1694	/// let a = 'a';
1695	/// let g = 'g';
1696	/// let zero = '0';
1697	/// let percent = '%';
1698	/// let space = ' ';
1699	/// let lf = '`\n`';
1700	/// let esc = '`\x1b`';
1701	///
1702	/// assert!(!uppercase_a.is_ascii_control());
1703	/// assert!(!uppercase_g.is_ascii_control());
1704	/// assert!(!a.is_ascii_control());
1705	/// assert!(!g.is_ascii_control());
1706	/// assert!(!zero.is_ascii_control());
1707	/// assert!(!percent.is_ascii_control());
1708	/// assert!(!space.is_ascii_control());
1709	/// assert!(lf.is_ascii_control());
1710	/// assert!(esc.is_ascii_control());
1711	/// ```
1712	#[must_use]
1713	#[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1714	#[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1715	#[inline]
1716	pub const fn is_ascii_control(&self) -> bool {
1717	matches!(*self, '`\0`'..='`\x1F`' \| '`\x7F`')
1718	}
1719	}
1720
1721	pub(crate) struct EscapeDebugExtArgs {
1722	/// Escape Extended Grapheme codepoints?
1723	pub(crate) escape_grapheme_extended: bool,
1724
1725	/// Escape single quotes?
1726	pub(crate) escape_single_quote: bool,
1727
1728	/// Escape double quotes?
1729	pub(crate) escape_double_quote: bool,
1730	}
1731
1732	impl EscapeDebugExtArgs {
1733	pub(crate) const ESCAPE_ALL: Self = Self {
1734	escape_grapheme_extended: `true`,
1735	escape_single_quote: `true`,
1736	escape_double_quote: `true`,
1737	};
1738	}
1739
1740	#[inline]
1741	const fn len_utf8(code: u32) -> usize {
1742	if code < MAX_ONE_B {
1743	`1`
1744	} else if code < MAX_TWO_B {
1745	`2`
1746	} else if code < MAX_THREE_B {
1747	`3`
1748	} else {
1749	`4`
1750	}
1751	}
1752
1753	/// Encodes a raw u32 value as UTF-8 into the provided byte buffer,
1754	/// and then returns the subslice of the buffer that contains the encoded character.
1755	///
1756	/// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
1757	/// (Creating a `char` in the surrogate range is UB.)
1758	/// The result is valid [generalized UTF-8] but not valid UTF-8.
1759	///
1760	/// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8
1761	///
1762	/// # Panics
1763	///
1764	/// Panics if the buffer is not large enough.
1765	/// A buffer of length four is large enough to encode any `char`.
1766	#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
1767	#[doc(hidden)]
1768	#[inline]
1769	pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
1770	let len = len_utf8(code);
1771	match (len, &mut dst[..]) {
1772	(`1`, [a, ..]) => {
1773	*a = code as u8;
1774	}
1775	(`2`, [a, b, ..]) => {
1776	*a = (code >> `6` & `0x1F`) as u8 \| TAG_TWO_B;
1777	*b = (code & `0x3F`) as u8 \| TAG_CONT;
1778	}
1779	(`3`, [a, b, c, ..]) => {
1780	*a = (code >> `12` & `0x0F`) as u8 \| TAG_THREE_B;
1781	*b = (code >> `6` & `0x3F`) as u8 \| TAG_CONT;
1782	*c = (code & `0x3F`) as u8 \| TAG_CONT;
1783	}
1784	(`4`, [a, b, c, d, ..]) => {
1785	*a = (code >> `18` & `0x07`) as u8 \| TAG_FOUR_B;
1786	*b = (code >> `12` & `0x3F`) as u8 \| TAG_CONT;
1787	*c = (code >> `6` & `0x3F`) as u8 \| TAG_CONT;
1788	*d = (code & `0x3F`) as u8 \| TAG_CONT;
1789	}
1790	_ => panic!(
1791	"encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
1792	len,
1793	code,
1794	dst.len(),
1795	),
1796	};
1797	&mut dst[..len]
1798	}
1799
1800	/// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
1801	/// and then returns the subslice of the buffer that contains the encoded character.
1802	///
1803	/// Unlike `char::encode_utf16`, this method also handles codepoints in the surrogate range.
1804	/// (Creating a `char` in the surrogate range is UB.)
1805	///
1806	/// # Panics
1807	///
1808	/// Panics if the buffer is not large enough.
1809	/// A buffer of length 2 is large enough to encode any `char`.
1810	#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
1811	#[doc(hidden)]
1812	#[inline]
1813	pub fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] {
1814	// SAFETY: each arm checks whether there are enough bits to write into
1815	unsafe {
1816	if (code & `0xFFFF`) == code && !dst.is_empty() {
1817	// The BMP falls through
1818	dst.get_unchecked_mut(index:`0`) = code as u16*;
1819	slice::from_raw_parts_mut(data:dst.as_mut_ptr(), len:`1`)
1820	} else if dst.len() >= `2` {
1821	// Supplementary planes break into surrogates.
1822	code -= `0x1_0000`;
1823	dst.get_unchecked_mut(index:`0`) = `0xD800` \| ((code >> `10`) as u16*);
1824	dst.get_unchecked_mut(index:`1`) = `0xDC00` \| ((code as u16*) & `0x3FF`);
1825	slice::from_raw_parts_mut(data:dst.as_mut_ptr(), len:`2`)
1826	} else {
1827	panic!(
1828	"encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
1829	char::from_u32_unchecked(code).len_utf16(),
1830	code,
1831	dst.len(),
1832	)
1833	}
1834	}
1835	}
1836

Provided by KDAB

Definitions