1 | //! impl char {} |
2 | |
3 | use super::*; |
4 | use crate::panic::const_panic; |
5 | use crate::slice; |
6 | use crate::str::from_utf8_unchecked_mut; |
7 | use crate::unicode::printable::is_printable; |
8 | use crate::unicode::{self, conversions}; |
9 | |
10 | impl char { |
11 | /// The lowest valid code point a `char` can have, `'\0'`. |
12 | /// |
13 | /// Unlike integer types, `char` actually has a gap in the middle, |
14 | /// meaning that the range of possible `char`s is smaller than you |
15 | /// might expect. Ranges of `char` will automatically hop this gap |
16 | /// for you: |
17 | /// |
18 | /// ``` |
19 | /// let dist = u32::from(char::MAX) - u32::from(char::MIN); |
20 | /// let size = (char::MIN..=char::MAX).count() as u32; |
21 | /// assert!(size < dist); |
22 | /// ``` |
23 | /// |
24 | /// Despite this gap, the `MIN` and [`MAX`] values can be used as bounds for |
25 | /// all `char` values. |
26 | /// |
27 | /// [`MAX`]: char::MAX |
28 | /// |
29 | /// # Examples |
30 | /// |
31 | /// ``` |
32 | /// # fn something_which_returns_char() -> char { 'a' } |
33 | /// let c: char = something_which_returns_char(); |
34 | /// assert!(char::MIN <= c); |
35 | /// |
36 | /// let value_at_min = u32::from(char::MIN); |
37 | /// assert_eq!(char::from_u32(value_at_min), Some(' \0' )); |
38 | /// ``` |
39 | #[stable (feature = "char_min" , since = "1.83.0" )] |
40 | pub const MIN: char = ' \0' ; |
41 | |
42 | /// The highest valid code point a `char` can have, `'\u{10FFFF}'`. |
43 | /// |
44 | /// Unlike integer types, `char` actually has a gap in the middle, |
45 | /// meaning that the range of possible `char`s is smaller than you |
46 | /// might expect. Ranges of `char` will automatically hop this gap |
47 | /// for you: |
48 | /// |
49 | /// ``` |
50 | /// let dist = u32::from(char::MAX) - u32::from(char::MIN); |
51 | /// let size = (char::MIN..=char::MAX).count() as u32; |
52 | /// assert!(size < dist); |
53 | /// ``` |
54 | /// |
55 | /// Despite this gap, the [`MIN`] and `MAX` values can be used as bounds for |
56 | /// all `char` values. |
57 | /// |
58 | /// [`MIN`]: char::MIN |
59 | /// |
60 | /// # Examples |
61 | /// |
62 | /// ``` |
63 | /// # fn something_which_returns_char() -> char { 'a' } |
64 | /// let c: char = something_which_returns_char(); |
65 | /// assert!(c <= char::MAX); |
66 | /// |
67 | /// let value_at_max = u32::from(char::MAX); |
68 | /// assert_eq!(char::from_u32(value_at_max), Some(' \u{10FFFF}' )); |
69 | /// assert_eq!(char::from_u32(value_at_max + 1), None); |
70 | /// ``` |
71 | #[stable (feature = "assoc_char_consts" , since = "1.52.0" )] |
72 | pub const MAX: char = ' \u{10FFFF}' ; |
73 | |
74 | /// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to |
75 | /// UTF-8 encoding. |
76 | #[unstable (feature = "char_max_len" , issue = "121714" )] |
77 | pub const MAX_LEN_UTF8: usize = 4; |
78 | |
79 | /// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char` |
80 | /// to UTF-16 encoding. |
81 | #[unstable (feature = "char_max_len" , issue = "121714" )] |
82 | pub const MAX_LEN_UTF16: usize = 2; |
83 | |
84 | /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a |
85 | /// decoding error. |
86 | /// |
87 | /// It can occur, for example, when giving ill-formed UTF-8 bytes to |
88 | /// [`String::from_utf8_lossy`](../std/string/struct.String.html#method.from_utf8_lossy). |
89 | #[stable (feature = "assoc_char_consts" , since = "1.52.0" )] |
90 | pub const REPLACEMENT_CHARACTER: char = ' \u{FFFD}' ; |
91 | |
92 | /// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of |
93 | /// `char` and `str` methods are based on. |
94 | /// |
95 | /// New versions of Unicode are released regularly and subsequently all methods |
96 | /// in the standard library depending on Unicode are updated. Therefore the |
97 | /// behavior of some `char` and `str` methods and the value of this constant |
98 | /// changes over time. This is *not* considered to be a breaking change. |
99 | /// |
100 | /// The version numbering scheme is explained in |
101 | /// [Unicode 11.0 or later, Section 3.1 Versions of the Unicode Standard](https://www.unicode.org/versions/Unicode11.0.0/ch03.pdf#page=4). |
102 | #[stable (feature = "assoc_char_consts" , since = "1.52.0" )] |
103 | pub const UNICODE_VERSION: (u8, u8, u8) = crate::unicode::UNICODE_VERSION; |
104 | |
105 | /// Creates an iterator over the native endian UTF-16 encoded code points in `iter`, |
106 | /// returning unpaired surrogates as `Err`s. |
107 | /// |
108 | /// # Examples |
109 | /// |
110 | /// Basic usage: |
111 | /// |
112 | /// ``` |
113 | /// // 𝄞mus<invalid>ic<invalid> |
114 | /// let v = [ |
115 | /// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834, |
116 | /// ]; |
117 | /// |
118 | /// assert_eq!( |
119 | /// char::decode_utf16(v) |
120 | /// .map(|r| r.map_err(|e| e.unpaired_surrogate())) |
121 | /// .collect::<Vec<_>>(), |
122 | /// vec![ |
123 | /// Ok('𝄞' ), |
124 | /// Ok('m' ), Ok('u' ), Ok('s' ), |
125 | /// Err(0xDD1E), |
126 | /// Ok('i' ), Ok('c' ), |
127 | /// Err(0xD834) |
128 | /// ] |
129 | /// ); |
130 | /// ``` |
131 | /// |
132 | /// A lossy decoder can be obtained by replacing `Err` results with the replacement character: |
133 | /// |
134 | /// ``` |
135 | /// // 𝄞mus<invalid>ic<invalid> |
136 | /// let v = [ |
137 | /// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834, |
138 | /// ]; |
139 | /// |
140 | /// assert_eq!( |
141 | /// char::decode_utf16(v) |
142 | /// .map(|r| r.unwrap_or(char::REPLACEMENT_CHARACTER)) |
143 | /// .collect::<String>(), |
144 | /// "𝄞mus�ic�" |
145 | /// ); |
146 | /// ``` |
147 | #[stable (feature = "assoc_char_funcs" , since = "1.52.0" )] |
148 | #[inline ] |
149 | pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> { |
150 | super::decode::decode_utf16(iter) |
151 | } |
152 | |
153 | /// Converts a `u32` to a `char`. |
154 | /// |
155 | /// Note that all `char`s are valid [`u32`]s, and can be cast to one with |
156 | /// [`as`](../std/keyword.as.html): |
157 | /// |
158 | /// ``` |
159 | /// let c = '💯' ; |
160 | /// let i = c as u32; |
161 | /// |
162 | /// assert_eq!(128175, i); |
163 | /// ``` |
164 | /// |
165 | /// However, the reverse is not true: not all valid [`u32`]s are valid |
166 | /// `char`s. `from_u32()` will return `None` if the input is not a valid value |
167 | /// for a `char`. |
168 | /// |
169 | /// For an unsafe version of this function which ignores these checks, see |
170 | /// [`from_u32_unchecked`]. |
171 | /// |
172 | /// [`from_u32_unchecked`]: #method.from_u32_unchecked |
173 | /// |
174 | /// # Examples |
175 | /// |
176 | /// Basic usage: |
177 | /// |
178 | /// ``` |
179 | /// let c = char::from_u32(0x2764); |
180 | /// |
181 | /// assert_eq!(Some('❤' ), c); |
182 | /// ``` |
183 | /// |
184 | /// Returning `None` when the input is not a valid `char`: |
185 | /// |
186 | /// ``` |
187 | /// let c = char::from_u32(0x110000); |
188 | /// |
189 | /// assert_eq!(None, c); |
190 | /// ``` |
191 | #[stable (feature = "assoc_char_funcs" , since = "1.52.0" )] |
192 | #[rustc_const_stable (feature = "const_char_convert" , since = "1.67.0" )] |
193 | #[must_use ] |
194 | #[inline ] |
195 | pub const fn from_u32(i: u32) -> Option<char> { |
196 | super::convert::from_u32(i) |
197 | } |
198 | |
199 | /// Converts a `u32` to a `char`, ignoring validity. |
200 | /// |
201 | /// Note that all `char`s are valid [`u32`]s, and can be cast to one with |
202 | /// `as`: |
203 | /// |
204 | /// ``` |
205 | /// let c = '💯' ; |
206 | /// let i = c as u32; |
207 | /// |
208 | /// assert_eq!(128175, i); |
209 | /// ``` |
210 | /// |
211 | /// However, the reverse is not true: not all valid [`u32`]s are valid |
212 | /// `char`s. `from_u32_unchecked()` will ignore this, and blindly cast to |
213 | /// `char`, possibly creating an invalid one. |
214 | /// |
215 | /// # Safety |
216 | /// |
217 | /// This function is unsafe, as it may construct invalid `char` values. |
218 | /// |
219 | /// For a safe version of this function, see the [`from_u32`] function. |
220 | /// |
221 | /// [`from_u32`]: #method.from_u32 |
222 | /// |
223 | /// # Examples |
224 | /// |
225 | /// Basic usage: |
226 | /// |
227 | /// ``` |
228 | /// let c = unsafe { char::from_u32_unchecked(0x2764) }; |
229 | /// |
230 | /// assert_eq!('❤' , c); |
231 | /// ``` |
232 | #[stable (feature = "assoc_char_funcs" , since = "1.52.0" )] |
233 | #[rustc_const_stable (feature = "const_char_from_u32_unchecked" , since = "1.81.0" )] |
234 | #[must_use ] |
235 | #[inline ] |
236 | pub const unsafe fn from_u32_unchecked(i: u32) -> char { |
237 | // SAFETY: the safety contract must be upheld by the caller. |
238 | unsafe { super::convert::from_u32_unchecked(i) } |
239 | } |
240 | |
241 | /// Converts a digit in the given radix to a `char`. |
242 | /// |
243 | /// A 'radix' here is sometimes also called a 'base'. A radix of two |
244 | /// indicates a binary number, a radix of ten, decimal, and a radix of |
245 | /// sixteen, hexadecimal, to give some common values. Arbitrary |
246 | /// radices are supported. |
247 | /// |
248 | /// `from_digit()` will return `None` if the input is not a digit in |
249 | /// the given radix. |
250 | /// |
251 | /// # Panics |
252 | /// |
253 | /// Panics if given a radix larger than 36. |
254 | /// |
255 | /// # Examples |
256 | /// |
257 | /// Basic usage: |
258 | /// |
259 | /// ``` |
260 | /// let c = char::from_digit(4, 10); |
261 | /// |
262 | /// assert_eq!(Some('4' ), c); |
263 | /// |
264 | /// // Decimal 11 is a single digit in base 16 |
265 | /// let c = char::from_digit(11, 16); |
266 | /// |
267 | /// assert_eq!(Some('b' ), c); |
268 | /// ``` |
269 | /// |
270 | /// Returning `None` when the input is not a digit: |
271 | /// |
272 | /// ``` |
273 | /// let c = char::from_digit(20, 10); |
274 | /// |
275 | /// assert_eq!(None, c); |
276 | /// ``` |
277 | /// |
278 | /// Passing a large radix, causing a panic: |
279 | /// |
280 | /// ```should_panic |
281 | /// // this panics |
282 | /// let _c = char::from_digit(1, 37); |
283 | /// ``` |
284 | #[stable (feature = "assoc_char_funcs" , since = "1.52.0" )] |
285 | #[rustc_const_stable (feature = "const_char_convert" , since = "1.67.0" )] |
286 | #[must_use ] |
287 | #[inline ] |
288 | pub const fn from_digit(num: u32, radix: u32) -> Option<char> { |
289 | super::convert::from_digit(num, radix) |
290 | } |
291 | |
292 | /// Checks if a `char` is a digit in the given radix. |
293 | /// |
294 | /// A 'radix' here is sometimes also called a 'base'. A radix of two |
295 | /// indicates a binary number, a radix of ten, decimal, and a radix of |
296 | /// sixteen, hexadecimal, to give some common values. Arbitrary |
297 | /// radices are supported. |
298 | /// |
299 | /// Compared to [`is_numeric()`], this function only recognizes the characters |
300 | /// `0-9`, `a-z` and `A-Z`. |
301 | /// |
302 | /// 'Digit' is defined to be only the following characters: |
303 | /// |
304 | /// * `0-9` |
305 | /// * `a-z` |
306 | /// * `A-Z` |
307 | /// |
308 | /// For a more comprehensive understanding of 'digit', see [`is_numeric()`]. |
309 | /// |
310 | /// [`is_numeric()`]: #method.is_numeric |
311 | /// |
312 | /// # Panics |
313 | /// |
314 | /// Panics if given a radix smaller than 2 or larger than 36. |
315 | /// |
316 | /// # Examples |
317 | /// |
318 | /// Basic usage: |
319 | /// |
320 | /// ``` |
321 | /// assert!('1' .is_digit(10)); |
322 | /// assert!('f' .is_digit(16)); |
323 | /// assert!(!'f' .is_digit(10)); |
324 | /// ``` |
325 | /// |
326 | /// Passing a large radix, causing a panic: |
327 | /// |
328 | /// ```should_panic |
329 | /// // this panics |
330 | /// '1' .is_digit(37); |
331 | /// ``` |
332 | /// |
333 | /// Passing a small radix, causing a panic: |
334 | /// |
335 | /// ```should_panic |
336 | /// // this panics |
337 | /// '1' .is_digit(1); |
338 | /// ``` |
339 | #[stable (feature = "rust1" , since = "1.0.0" )] |
340 | #[rustc_const_stable (feature = "const_char_classify" , since = "1.87.0" )] |
341 | #[inline ] |
342 | pub const fn is_digit(self, radix: u32) -> bool { |
343 | self.to_digit(radix).is_some() |
344 | } |
345 | |
346 | /// Converts a `char` to a digit in the given radix. |
347 | /// |
348 | /// A 'radix' here is sometimes also called a 'base'. A radix of two |
349 | /// indicates a binary number, a radix of ten, decimal, and a radix of |
350 | /// sixteen, hexadecimal, to give some common values. Arbitrary |
351 | /// radices are supported. |
352 | /// |
353 | /// 'Digit' is defined to be only the following characters: |
354 | /// |
355 | /// * `0-9` |
356 | /// * `a-z` |
357 | /// * `A-Z` |
358 | /// |
359 | /// # Errors |
360 | /// |
361 | /// Returns `None` if the `char` does not refer to a digit in the given radix. |
362 | /// |
363 | /// # Panics |
364 | /// |
365 | /// Panics if given a radix smaller than 2 or larger than 36. |
366 | /// |
367 | /// # Examples |
368 | /// |
369 | /// Basic usage: |
370 | /// |
371 | /// ``` |
372 | /// assert_eq!('1' .to_digit(10), Some(1)); |
373 | /// assert_eq!('f' .to_digit(16), Some(15)); |
374 | /// ``` |
375 | /// |
376 | /// Passing a non-digit results in failure: |
377 | /// |
378 | /// ``` |
379 | /// assert_eq!('f' .to_digit(10), None); |
380 | /// assert_eq!('z' .to_digit(16), None); |
381 | /// ``` |
382 | /// |
383 | /// Passing a large radix, causing a panic: |
384 | /// |
385 | /// ```should_panic |
386 | /// // this panics |
387 | /// let _ = '1' .to_digit(37); |
388 | /// ``` |
389 | /// Passing a small radix, causing a panic: |
390 | /// |
391 | /// ```should_panic |
392 | /// // this panics |
393 | /// let _ = '1' .to_digit(1); |
394 | /// ``` |
395 | #[stable (feature = "rust1" , since = "1.0.0" )] |
396 | #[rustc_const_stable (feature = "const_char_convert" , since = "1.67.0" )] |
397 | #[must_use = "this returns the result of the operation, \ |
398 | without modifying the original" ] |
399 | #[inline ] |
400 | pub const fn to_digit(self, radix: u32) -> Option<u32> { |
401 | assert!( |
402 | radix >= 2 && radix <= 36, |
403 | "to_digit: invalid radix -- radix must be in the range 2 to 36 inclusive" |
404 | ); |
405 | // check radix to remove letter handling code when radix is a known constant |
406 | let value = if self > '9' && radix > 10 { |
407 | // mask to convert ASCII letters to uppercase |
408 | const TO_UPPERCASE_MASK: u32 = !0b0010_0000; |
409 | // Converts an ASCII letter to its corresponding integer value: |
410 | // A-Z => 10-35, a-z => 10-35. Other characters produce values >= 36. |
411 | // |
412 | // Add Overflow Safety: |
413 | // By applying the mask after the subtraction, the first addendum is |
414 | // constrained such that it never exceeds u32::MAX - 0x20. |
415 | ((self as u32).wrapping_sub('A' as u32) & TO_UPPERCASE_MASK) + 10 |
416 | } else { |
417 | // convert digit to value, non-digits wrap to values > 36 |
418 | (self as u32).wrapping_sub('0' as u32) |
419 | }; |
420 | // FIXME(const-hack): once then_some is const fn, use it here |
421 | if value < radix { Some(value) } else { None } |
422 | } |
423 | |
424 | /// Returns an iterator that yields the hexadecimal Unicode escape of a |
425 | /// character as `char`s. |
426 | /// |
427 | /// This will escape characters with the Rust syntax of the form |
428 | /// `\u{NNNNNN}` where `NNNNNN` is a hexadecimal representation. |
429 | /// |
430 | /// # Examples |
431 | /// |
432 | /// As an iterator: |
433 | /// |
434 | /// ``` |
435 | /// for c in '❤' .escape_unicode() { |
436 | /// print!("{c}" ); |
437 | /// } |
438 | /// println!(); |
439 | /// ``` |
440 | /// |
441 | /// Using `println!` directly: |
442 | /// |
443 | /// ``` |
444 | /// println!("{}" , '❤' .escape_unicode()); |
445 | /// ``` |
446 | /// |
447 | /// Both are equivalent to: |
448 | /// |
449 | /// ``` |
450 | /// println!(" \\u{{2764}}" ); |
451 | /// ``` |
452 | /// |
453 | /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string): |
454 | /// |
455 | /// ``` |
456 | /// assert_eq!('❤' .escape_unicode().to_string(), " \\u{2764}" ); |
457 | /// ``` |
458 | #[must_use = "this returns the escaped char as an iterator, \ |
459 | without modifying the original" ] |
460 | #[stable (feature = "rust1" , since = "1.0.0" )] |
461 | #[inline ] |
462 | pub fn escape_unicode(self) -> EscapeUnicode { |
463 | EscapeUnicode::new(self) |
464 | } |
465 | |
466 | /// An extended version of `escape_debug` that optionally permits escaping |
467 | /// Extended Grapheme codepoints, single quotes, and double quotes. This |
468 | /// allows us to format characters like nonspacing marks better when they're |
469 | /// at the start of a string, and allows escaping single quotes in |
470 | /// characters, and double quotes in strings. |
471 | #[inline ] |
472 | pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> EscapeDebug { |
473 | match self { |
474 | ' \0' => EscapeDebug::backslash(ascii::Char::Digit0), |
475 | ' \t' => EscapeDebug::backslash(ascii::Char::SmallT), |
476 | ' \r' => EscapeDebug::backslash(ascii::Char::SmallR), |
477 | ' \n' => EscapeDebug::backslash(ascii::Char::SmallN), |
478 | ' \\' => EscapeDebug::backslash(ascii::Char::ReverseSolidus), |
479 | ' \"' if args.escape_double_quote => EscapeDebug::backslash(ascii::Char::QuotationMark), |
480 | ' \'' if args.escape_single_quote => EscapeDebug::backslash(ascii::Char::Apostrophe), |
481 | _ if args.escape_grapheme_extended && self.is_grapheme_extended() => { |
482 | EscapeDebug::unicode(self) |
483 | } |
484 | _ if is_printable(self) => EscapeDebug::printable(self), |
485 | _ => EscapeDebug::unicode(self), |
486 | } |
487 | } |
488 | |
489 | /// Returns an iterator that yields the literal escape code of a character |
490 | /// as `char`s. |
491 | /// |
492 | /// This will escape the characters similar to the [`Debug`](core::fmt::Debug) implementations |
493 | /// of `str` or `char`. |
494 | /// |
495 | /// # Examples |
496 | /// |
497 | /// As an iterator: |
498 | /// |
499 | /// ``` |
500 | /// for c in ' \n' .escape_debug() { |
501 | /// print!("{c}" ); |
502 | /// } |
503 | /// println!(); |
504 | /// ``` |
505 | /// |
506 | /// Using `println!` directly: |
507 | /// |
508 | /// ``` |
509 | /// println!("{}" , ' \n' .escape_debug()); |
510 | /// ``` |
511 | /// |
512 | /// Both are equivalent to: |
513 | /// |
514 | /// ``` |
515 | /// println!(" \\n" ); |
516 | /// ``` |
517 | /// |
518 | /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string): |
519 | /// |
520 | /// ``` |
521 | /// assert_eq!(' \n' .escape_debug().to_string(), " \\n" ); |
522 | /// ``` |
523 | #[must_use = "this returns the escaped char as an iterator, \ |
524 | without modifying the original" ] |
525 | #[stable (feature = "char_escape_debug" , since = "1.20.0" )] |
526 | #[inline ] |
527 | pub fn escape_debug(self) -> EscapeDebug { |
528 | self.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL) |
529 | } |
530 | |
531 | /// Returns an iterator that yields the literal escape code of a character |
532 | /// as `char`s. |
533 | /// |
534 | /// The default is chosen with a bias toward producing literals that are |
535 | /// legal in a variety of languages, including C++11 and similar C-family |
536 | /// languages. The exact rules are: |
537 | /// |
538 | /// * Tab is escaped as `\t`. |
539 | /// * Carriage return is escaped as `\r`. |
540 | /// * Line feed is escaped as `\n`. |
541 | /// * Single quote is escaped as `\'`. |
542 | /// * Double quote is escaped as `\"`. |
543 | /// * Backslash is escaped as `\\`. |
544 | /// * Any character in the 'printable ASCII' range `0x20` .. `0x7e` |
545 | /// inclusive is not escaped. |
546 | /// * All other characters are given hexadecimal Unicode escapes; see |
547 | /// [`escape_unicode`]. |
548 | /// |
549 | /// [`escape_unicode`]: #method.escape_unicode |
550 | /// |
551 | /// # Examples |
552 | /// |
553 | /// As an iterator: |
554 | /// |
555 | /// ``` |
556 | /// for c in '"' .escape_default() { |
557 | /// print!("{c}" ); |
558 | /// } |
559 | /// println!(); |
560 | /// ``` |
561 | /// |
562 | /// Using `println!` directly: |
563 | /// |
564 | /// ``` |
565 | /// println!("{}" , '"' .escape_default()); |
566 | /// ``` |
567 | /// |
568 | /// Both are equivalent to: |
569 | /// |
570 | /// ``` |
571 | /// println!(" \\\"" ); |
572 | /// ``` |
573 | /// |
574 | /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string): |
575 | /// |
576 | /// ``` |
577 | /// assert_eq!('"' .escape_default().to_string(), " \\\"" ); |
578 | /// ``` |
579 | #[must_use = "this returns the escaped char as an iterator, \ |
580 | without modifying the original" ] |
581 | #[stable (feature = "rust1" , since = "1.0.0" )] |
582 | #[inline ] |
583 | pub fn escape_default(self) -> EscapeDefault { |
584 | match self { |
585 | ' \t' => EscapeDefault::backslash(ascii::Char::SmallT), |
586 | ' \r' => EscapeDefault::backslash(ascii::Char::SmallR), |
587 | ' \n' => EscapeDefault::backslash(ascii::Char::SmallN), |
588 | ' \\' | ' \'' | ' \"' => EscapeDefault::backslash(self.as_ascii().unwrap()), |
589 | ' \x20' ..=' \x7e' => EscapeDefault::printable(self.as_ascii().unwrap()), |
590 | _ => EscapeDefault::unicode(self), |
591 | } |
592 | } |
593 | |
594 | /// Returns the number of bytes this `char` would need if encoded in UTF-8. |
595 | /// |
596 | /// That number of bytes is always between 1 and 4, inclusive. |
597 | /// |
598 | /// # Examples |
599 | /// |
600 | /// Basic usage: |
601 | /// |
602 | /// ``` |
603 | /// let len = 'A' .len_utf8(); |
604 | /// assert_eq!(len, 1); |
605 | /// |
606 | /// let len = 'ß' .len_utf8(); |
607 | /// assert_eq!(len, 2); |
608 | /// |
609 | /// let len = 'ℝ' .len_utf8(); |
610 | /// assert_eq!(len, 3); |
611 | /// |
612 | /// let len = '💣' .len_utf8(); |
613 | /// assert_eq!(len, 4); |
614 | /// ``` |
615 | /// |
616 | /// The `&str` type guarantees that its contents are UTF-8, and so we can compare the length it |
617 | /// would take if each code point was represented as a `char` vs in the `&str` itself: |
618 | /// |
619 | /// ``` |
620 | /// // as chars |
621 | /// let eastern = '東' ; |
622 | /// let capital = '京' ; |
623 | /// |
624 | /// // both can be represented as three bytes |
625 | /// assert_eq!(3, eastern.len_utf8()); |
626 | /// assert_eq!(3, capital.len_utf8()); |
627 | /// |
628 | /// // as a &str, these two are encoded in UTF-8 |
629 | /// let tokyo = "東京" ; |
630 | /// |
631 | /// let len = eastern.len_utf8() + capital.len_utf8(); |
632 | /// |
633 | /// // we can see that they take six bytes total... |
634 | /// assert_eq!(6, tokyo.len()); |
635 | /// |
636 | /// // ... just like the &str |
637 | /// assert_eq!(len, tokyo.len()); |
638 | /// ``` |
639 | #[stable (feature = "rust1" , since = "1.0.0" )] |
640 | #[rustc_const_stable (feature = "const_char_len_utf" , since = "1.52.0" )] |
641 | #[inline ] |
642 | #[must_use ] |
643 | pub const fn len_utf8(self) -> usize { |
644 | len_utf8(self as u32) |
645 | } |
646 | |
647 | /// Returns the number of 16-bit code units this `char` would need if |
648 | /// encoded in UTF-16. |
649 | /// |
650 | /// That number of code units is always either 1 or 2, for unicode scalar values in |
651 | /// the [basic multilingual plane] or [supplementary planes] respectively. |
652 | /// |
653 | /// See the documentation for [`len_utf8()`] for more explanation of this |
654 | /// concept. This function is a mirror, but for UTF-16 instead of UTF-8. |
655 | /// |
656 | /// [basic multilingual plane]: http://www.unicode.org/glossary/#basic_multilingual_plane |
657 | /// [supplementary planes]: http://www.unicode.org/glossary/#supplementary_planes |
658 | /// [`len_utf8()`]: #method.len_utf8 |
659 | /// |
660 | /// # Examples |
661 | /// |
662 | /// Basic usage: |
663 | /// |
664 | /// ``` |
665 | /// let n = 'ß' .len_utf16(); |
666 | /// assert_eq!(n, 1); |
667 | /// |
668 | /// let len = '💣' .len_utf16(); |
669 | /// assert_eq!(len, 2); |
670 | /// ``` |
671 | #[stable (feature = "rust1" , since = "1.0.0" )] |
672 | #[rustc_const_stable (feature = "const_char_len_utf" , since = "1.52.0" )] |
673 | #[inline ] |
674 | #[must_use ] |
675 | pub const fn len_utf16(self) -> usize { |
676 | len_utf16(self as u32) |
677 | } |
678 | |
679 | /// Encodes this character as UTF-8 into the provided byte buffer, |
680 | /// and then returns the subslice of the buffer that contains the encoded character. |
681 | /// |
682 | /// # Panics |
683 | /// |
684 | /// Panics if the buffer is not large enough. |
685 | /// A buffer of length four is large enough to encode any `char`. |
686 | /// |
687 | /// # Examples |
688 | /// |
689 | /// In both of these examples, 'ß' takes two bytes to encode. |
690 | /// |
691 | /// ``` |
692 | /// let mut b = [0; 2]; |
693 | /// |
694 | /// let result = 'ß' .encode_utf8(&mut b); |
695 | /// |
696 | /// assert_eq!(result, "ß" ); |
697 | /// |
698 | /// assert_eq!(result.len(), 2); |
699 | /// ``` |
700 | /// |
701 | /// A buffer that's too small: |
702 | /// |
703 | /// ```should_panic |
704 | /// let mut b = [0; 1]; |
705 | /// |
706 | /// // this panics |
707 | /// 'ß' .encode_utf8(&mut b); |
708 | /// ``` |
709 | #[stable (feature = "unicode_encode_char" , since = "1.15.0" )] |
710 | #[rustc_const_stable (feature = "const_char_encode_utf8" , since = "1.83.0" )] |
711 | #[inline ] |
712 | pub const fn encode_utf8(self, dst: &mut [u8]) -> &mut str { |
713 | // SAFETY: `char` is not a surrogate, so this is valid UTF-8. |
714 | unsafe { from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) } |
715 | } |
716 | |
717 | /// Encodes this character as native endian UTF-16 into the provided `u16` buffer, |
718 | /// and then returns the subslice of the buffer that contains the encoded character. |
719 | /// |
720 | /// # Panics |
721 | /// |
722 | /// Panics if the buffer is not large enough. |
723 | /// A buffer of length 2 is large enough to encode any `char`. |
724 | /// |
725 | /// # Examples |
726 | /// |
727 | /// In both of these examples, '𝕊' takes two `u16`s to encode. |
728 | /// |
729 | /// ``` |
730 | /// let mut b = [0; 2]; |
731 | /// |
732 | /// let result = '𝕊' .encode_utf16(&mut b); |
733 | /// |
734 | /// assert_eq!(result.len(), 2); |
735 | /// ``` |
736 | /// |
737 | /// A buffer that's too small: |
738 | /// |
739 | /// ```should_panic |
740 | /// let mut b = [0; 1]; |
741 | /// |
742 | /// // this panics |
743 | /// '𝕊' .encode_utf16(&mut b); |
744 | /// ``` |
745 | #[stable (feature = "unicode_encode_char" , since = "1.15.0" )] |
746 | #[rustc_const_stable (feature = "const_char_encode_utf16" , since = "1.84.0" )] |
747 | #[inline ] |
748 | pub const fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] { |
749 | encode_utf16_raw(self as u32, dst) |
750 | } |
751 | |
752 | /// Returns `true` if this `char` has the `Alphabetic` property. |
753 | /// |
754 | /// `Alphabetic` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and |
755 | /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`]. |
756 | /// |
757 | /// [Unicode Standard]: https://www.unicode.org/versions/latest/ |
758 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
759 | /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt |
760 | /// |
761 | /// # Examples |
762 | /// |
763 | /// Basic usage: |
764 | /// |
765 | /// ``` |
766 | /// assert!('a' .is_alphabetic()); |
767 | /// assert!('京' .is_alphabetic()); |
768 | /// |
769 | /// let c = '💝' ; |
770 | /// // love is many things, but it is not alphabetic |
771 | /// assert!(!c.is_alphabetic()); |
772 | /// ``` |
773 | #[must_use ] |
774 | #[stable (feature = "rust1" , since = "1.0.0" )] |
775 | #[inline ] |
776 | pub fn is_alphabetic(self) -> bool { |
777 | match self { |
778 | 'a' ..='z' | 'A' ..='Z' => true, |
779 | c => c > ' \x7f' && unicode::Alphabetic(c), |
780 | } |
781 | } |
782 | |
783 | /// Returns `true` if this `char` has the `Lowercase` property. |
784 | /// |
785 | /// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and |
786 | /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`]. |
787 | /// |
788 | /// [Unicode Standard]: https://www.unicode.org/versions/latest/ |
789 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
790 | /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt |
791 | /// |
792 | /// # Examples |
793 | /// |
794 | /// Basic usage: |
795 | /// |
796 | /// ``` |
797 | /// assert!('a' .is_lowercase()); |
798 | /// assert!('δ' .is_lowercase()); |
799 | /// assert!(!'A' .is_lowercase()); |
800 | /// assert!(!'Δ' .is_lowercase()); |
801 | /// |
802 | /// // The various Chinese scripts and punctuation do not have case, and so: |
803 | /// assert!(!'中' .is_lowercase()); |
804 | /// assert!(!' ' .is_lowercase()); |
805 | /// ``` |
806 | /// |
807 | /// In a const context: |
808 | /// |
809 | /// ``` |
810 | /// const CAPITAL_DELTA_IS_LOWERCASE: bool = 'Δ' .is_lowercase(); |
811 | /// assert!(!CAPITAL_DELTA_IS_LOWERCASE); |
812 | /// ``` |
813 | #[must_use ] |
814 | #[stable (feature = "rust1" , since = "1.0.0" )] |
815 | #[rustc_const_stable (feature = "const_unicode_case_lookup" , since = "1.84.0" )] |
816 | #[inline ] |
817 | pub const fn is_lowercase(self) -> bool { |
818 | match self { |
819 | 'a' ..='z' => true, |
820 | c => c > ' \x7f' && unicode::Lowercase(c), |
821 | } |
822 | } |
823 | |
824 | /// Returns `true` if this `char` has the `Uppercase` property. |
825 | /// |
826 | /// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and |
827 | /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`]. |
828 | /// |
829 | /// [Unicode Standard]: https://www.unicode.org/versions/latest/ |
830 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
831 | /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt |
832 | /// |
833 | /// # Examples |
834 | /// |
835 | /// Basic usage: |
836 | /// |
837 | /// ``` |
838 | /// assert!(!'a' .is_uppercase()); |
839 | /// assert!(!'δ' .is_uppercase()); |
840 | /// assert!('A' .is_uppercase()); |
841 | /// assert!('Δ' .is_uppercase()); |
842 | /// |
843 | /// // The various Chinese scripts and punctuation do not have case, and so: |
844 | /// assert!(!'中' .is_uppercase()); |
845 | /// assert!(!' ' .is_uppercase()); |
846 | /// ``` |
847 | /// |
848 | /// In a const context: |
849 | /// |
850 | /// ``` |
851 | /// const CAPITAL_DELTA_IS_UPPERCASE: bool = 'Δ' .is_uppercase(); |
852 | /// assert!(CAPITAL_DELTA_IS_UPPERCASE); |
853 | /// ``` |
854 | #[must_use ] |
855 | #[stable (feature = "rust1" , since = "1.0.0" )] |
856 | #[rustc_const_stable (feature = "const_unicode_case_lookup" , since = "1.84.0" )] |
857 | #[inline ] |
858 | pub const fn is_uppercase(self) -> bool { |
859 | match self { |
860 | 'A' ..='Z' => true, |
861 | c => c > ' \x7f' && unicode::Uppercase(c), |
862 | } |
863 | } |
864 | |
865 | /// Returns `true` if this `char` has the `White_Space` property. |
866 | /// |
867 | /// `White_Space` is specified in the [Unicode Character Database][ucd] [`PropList.txt`]. |
868 | /// |
869 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
870 | /// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt |
871 | /// |
872 | /// # Examples |
873 | /// |
874 | /// Basic usage: |
875 | /// |
876 | /// ``` |
877 | /// assert!(' ' .is_whitespace()); |
878 | /// |
879 | /// // line break |
880 | /// assert!(' \n' .is_whitespace()); |
881 | /// |
882 | /// // a non-breaking space |
883 | /// assert!(' \u{A0}' .is_whitespace()); |
884 | /// |
885 | /// assert!(!'越' .is_whitespace()); |
886 | /// ``` |
887 | #[must_use ] |
888 | #[stable (feature = "rust1" , since = "1.0.0" )] |
889 | #[rustc_const_stable (feature = "const_char_classify" , since = "1.87.0" )] |
890 | #[inline ] |
891 | pub const fn is_whitespace(self) -> bool { |
892 | match self { |
893 | ' ' | ' \x09' ..=' \x0d' => true, |
894 | c => c > ' \x7f' && unicode::White_Space(c), |
895 | } |
896 | } |
897 | |
898 | /// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`]. |
899 | /// |
900 | /// [`is_alphabetic()`]: #method.is_alphabetic |
901 | /// [`is_numeric()`]: #method.is_numeric |
902 | /// |
903 | /// # Examples |
904 | /// |
905 | /// Basic usage: |
906 | /// |
907 | /// ``` |
908 | /// assert!('٣' .is_alphanumeric()); |
909 | /// assert!('7' .is_alphanumeric()); |
910 | /// assert!('৬' .is_alphanumeric()); |
911 | /// assert!('¾' .is_alphanumeric()); |
912 | /// assert!('①' .is_alphanumeric()); |
913 | /// assert!('K' .is_alphanumeric()); |
914 | /// assert!('و' .is_alphanumeric()); |
915 | /// assert!('藏' .is_alphanumeric()); |
916 | /// ``` |
917 | #[must_use ] |
918 | #[stable (feature = "rust1" , since = "1.0.0" )] |
919 | #[inline ] |
920 | pub fn is_alphanumeric(self) -> bool { |
921 | self.is_alphabetic() || self.is_numeric() |
922 | } |
923 | |
924 | /// Returns `true` if this `char` has the general category for control codes. |
925 | /// |
926 | /// Control codes (code points with the general category of `Cc`) are described in Chapter 4 |
927 | /// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character |
928 | /// Database][ucd] [`UnicodeData.txt`]. |
929 | /// |
930 | /// [Unicode Standard]: https://www.unicode.org/versions/latest/ |
931 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
932 | /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt |
933 | /// |
934 | /// # Examples |
935 | /// |
936 | /// Basic usage: |
937 | /// |
938 | /// ``` |
939 | /// // U+009C, STRING TERMINATOR |
940 | /// assert!('' .is_control()); |
941 | /// assert!(!'q' .is_control()); |
942 | /// ``` |
943 | #[must_use ] |
944 | #[stable (feature = "rust1" , since = "1.0.0" )] |
945 | #[inline ] |
946 | pub fn is_control(self) -> bool { |
947 | unicode::Cc(self) |
948 | } |
949 | |
950 | /// Returns `true` if this `char` has the `Grapheme_Extend` property. |
951 | /// |
952 | /// `Grapheme_Extend` is described in [Unicode Standard Annex #29 (Unicode Text |
953 | /// Segmentation)][uax29] and specified in the [Unicode Character Database][ucd] |
954 | /// [`DerivedCoreProperties.txt`]. |
955 | /// |
956 | /// [uax29]: https://www.unicode.org/reports/tr29/ |
957 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
958 | /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt |
959 | #[must_use ] |
960 | #[inline ] |
961 | pub(crate) fn is_grapheme_extended(self) -> bool { |
962 | unicode::Grapheme_Extend(self) |
963 | } |
964 | |
965 | /// Returns `true` if this `char` has one of the general categories for numbers. |
966 | /// |
967 | /// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric |
968 | /// characters, and `No` for other numeric characters) are specified in the [Unicode Character |
969 | /// Database][ucd] [`UnicodeData.txt`]. |
970 | /// |
971 | /// This method doesn't cover everything that could be considered a number, e.g. ideographic numbers like '三'. |
972 | /// If you want everything including characters with overlapping purposes then you might want to use |
973 | /// a unicode or language-processing library that exposes the appropriate character properties instead |
974 | /// of looking at the unicode categories. |
975 | /// |
976 | /// If you want to parse ASCII decimal digits (0-9) or ASCII base-N, use |
977 | /// `is_ascii_digit` or `is_digit` instead. |
978 | /// |
979 | /// [Unicode Standard]: https://www.unicode.org/versions/latest/ |
980 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
981 | /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt |
982 | /// |
983 | /// # Examples |
984 | /// |
985 | /// Basic usage: |
986 | /// |
987 | /// ``` |
988 | /// assert!('٣' .is_numeric()); |
989 | /// assert!('7' .is_numeric()); |
990 | /// assert!('৬' .is_numeric()); |
991 | /// assert!('¾' .is_numeric()); |
992 | /// assert!('①' .is_numeric()); |
993 | /// assert!(!'K' .is_numeric()); |
994 | /// assert!(!'و' .is_numeric()); |
995 | /// assert!(!'藏' .is_numeric()); |
996 | /// assert!(!'三' .is_numeric()); |
997 | /// ``` |
998 | #[must_use ] |
999 | #[stable (feature = "rust1" , since = "1.0.0" )] |
1000 | #[inline ] |
1001 | pub fn is_numeric(self) -> bool { |
1002 | match self { |
1003 | '0' ..='9' => true, |
1004 | c => c > ' \x7f' && unicode::N(c), |
1005 | } |
1006 | } |
1007 | |
1008 | /// Returns an iterator that yields the lowercase mapping of this `char` as one or more |
1009 | /// `char`s. |
1010 | /// |
1011 | /// If this `char` does not have a lowercase mapping, the iterator yields the same `char`. |
1012 | /// |
1013 | /// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character |
1014 | /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`. |
1015 | /// |
1016 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
1017 | /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt |
1018 | /// |
1019 | /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields |
1020 | /// the `char`(s) given by [`SpecialCasing.txt`]. |
1021 | /// |
1022 | /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt |
1023 | /// |
1024 | /// This operation performs an unconditional mapping without tailoring. That is, the conversion |
1025 | /// is independent of context and language. |
1026 | /// |
1027 | /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in |
1028 | /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion. |
1029 | /// |
1030 | /// [Unicode Standard]: https://www.unicode.org/versions/latest/ |
1031 | /// |
1032 | /// # Examples |
1033 | /// |
1034 | /// As an iterator: |
1035 | /// |
1036 | /// ``` |
1037 | /// for c in 'İ' .to_lowercase() { |
1038 | /// print!("{c}" ); |
1039 | /// } |
1040 | /// println!(); |
1041 | /// ``` |
1042 | /// |
1043 | /// Using `println!` directly: |
1044 | /// |
1045 | /// ``` |
1046 | /// println!("{}" , 'İ' .to_lowercase()); |
1047 | /// ``` |
1048 | /// |
1049 | /// Both are equivalent to: |
1050 | /// |
1051 | /// ``` |
1052 | /// println!("i \u{307}" ); |
1053 | /// ``` |
1054 | /// |
1055 | /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string): |
1056 | /// |
1057 | /// ``` |
1058 | /// assert_eq!('C' .to_lowercase().to_string(), "c" ); |
1059 | /// |
1060 | /// // Sometimes the result is more than one character: |
1061 | /// assert_eq!('İ' .to_lowercase().to_string(), "i \u{307}" ); |
1062 | /// |
1063 | /// // Characters that do not have both uppercase and lowercase |
1064 | /// // convert into themselves. |
1065 | /// assert_eq!('山' .to_lowercase().to_string(), "山" ); |
1066 | /// ``` |
1067 | #[must_use = "this returns the lowercase character as a new iterator, \ |
1068 | without modifying the original" ] |
1069 | #[stable (feature = "rust1" , since = "1.0.0" )] |
1070 | #[inline ] |
1071 | pub fn to_lowercase(self) -> ToLowercase { |
1072 | ToLowercase(CaseMappingIter::new(conversions::to_lower(self))) |
1073 | } |
1074 | |
1075 | /// Returns an iterator that yields the uppercase mapping of this `char` as one or more |
1076 | /// `char`s. |
1077 | /// |
1078 | /// If this `char` does not have an uppercase mapping, the iterator yields the same `char`. |
1079 | /// |
1080 | /// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character |
1081 | /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`. |
1082 | /// |
1083 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
1084 | /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt |
1085 | /// |
1086 | /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields |
1087 | /// the `char`(s) given by [`SpecialCasing.txt`]. |
1088 | /// |
1089 | /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt |
1090 | /// |
1091 | /// This operation performs an unconditional mapping without tailoring. That is, the conversion |
1092 | /// is independent of context and language. |
1093 | /// |
1094 | /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in |
1095 | /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion. |
1096 | /// |
1097 | /// [Unicode Standard]: https://www.unicode.org/versions/latest/ |
1098 | /// |
1099 | /// # Examples |
1100 | /// |
1101 | /// As an iterator: |
1102 | /// |
1103 | /// ``` |
1104 | /// for c in 'ß' .to_uppercase() { |
1105 | /// print!("{c}" ); |
1106 | /// } |
1107 | /// println!(); |
1108 | /// ``` |
1109 | /// |
1110 | /// Using `println!` directly: |
1111 | /// |
1112 | /// ``` |
1113 | /// println!("{}" , 'ß' .to_uppercase()); |
1114 | /// ``` |
1115 | /// |
1116 | /// Both are equivalent to: |
1117 | /// |
1118 | /// ``` |
1119 | /// println!("SS" ); |
1120 | /// ``` |
1121 | /// |
1122 | /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string): |
1123 | /// |
1124 | /// ``` |
1125 | /// assert_eq!('c' .to_uppercase().to_string(), "C" ); |
1126 | /// |
1127 | /// // Sometimes the result is more than one character: |
1128 | /// assert_eq!('ß' .to_uppercase().to_string(), "SS" ); |
1129 | /// |
1130 | /// // Characters that do not have both uppercase and lowercase |
1131 | /// // convert into themselves. |
1132 | /// assert_eq!('山' .to_uppercase().to_string(), "山" ); |
1133 | /// ``` |
1134 | /// |
1135 | /// # Note on locale |
1136 | /// |
1137 | /// In Turkish, the equivalent of 'i' in Latin has five forms instead of two: |
1138 | /// |
1139 | /// * 'Dotless': I / ı, sometimes written ï |
1140 | /// * 'Dotted': İ / i |
1141 | /// |
1142 | /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore: |
1143 | /// |
1144 | /// ``` |
1145 | /// let upper_i = 'i' .to_uppercase().to_string(); |
1146 | /// ``` |
1147 | /// |
1148 | /// The value of `upper_i` here relies on the language of the text: if we're |
1149 | /// in `en-US`, it should be `"I"`, but if we're in `tr_TR`, it should |
1150 | /// be `"İ"`. `to_uppercase()` does not take this into account, and so: |
1151 | /// |
1152 | /// ``` |
1153 | /// let upper_i = 'i' .to_uppercase().to_string(); |
1154 | /// |
1155 | /// assert_eq!(upper_i, "I" ); |
1156 | /// ``` |
1157 | /// |
1158 | /// holds across languages. |
1159 | #[must_use = "this returns the uppercase character as a new iterator, \ |
1160 | without modifying the original" ] |
1161 | #[stable (feature = "rust1" , since = "1.0.0" )] |
1162 | #[inline ] |
1163 | pub fn to_uppercase(self) -> ToUppercase { |
1164 | ToUppercase(CaseMappingIter::new(conversions::to_upper(self))) |
1165 | } |
1166 | |
1167 | /// Checks if the value is within the ASCII range. |
1168 | /// |
1169 | /// # Examples |
1170 | /// |
1171 | /// ``` |
1172 | /// let ascii = 'a' ; |
1173 | /// let non_ascii = '❤' ; |
1174 | /// |
1175 | /// assert!(ascii.is_ascii()); |
1176 | /// assert!(!non_ascii.is_ascii()); |
1177 | /// ``` |
1178 | #[must_use ] |
1179 | #[stable (feature = "ascii_methods_on_intrinsics" , since = "1.23.0" )] |
1180 | #[rustc_const_stable (feature = "const_char_is_ascii" , since = "1.32.0" )] |
1181 | #[rustc_diagnostic_item = "char_is_ascii" ] |
1182 | #[inline ] |
1183 | pub const fn is_ascii(&self) -> bool { |
1184 | *self as u32 <= 0x7F |
1185 | } |
1186 | |
1187 | /// Returns `Some` if the value is within the ASCII range, |
1188 | /// or `None` if it's not. |
1189 | /// |
1190 | /// This is preferred to [`Self::is_ascii`] when you're passing the value |
1191 | /// along to something else that can take [`ascii::Char`] rather than |
1192 | /// needing to check again for itself whether the value is in ASCII. |
1193 | #[must_use ] |
1194 | #[unstable (feature = "ascii_char" , issue = "110998" )] |
1195 | #[inline ] |
1196 | pub const fn as_ascii(&self) -> Option<ascii::Char> { |
1197 | if self.is_ascii() { |
1198 | // SAFETY: Just checked that this is ASCII. |
1199 | Some(unsafe { ascii::Char::from_u8_unchecked(*self as u8) }) |
1200 | } else { |
1201 | None |
1202 | } |
1203 | } |
1204 | |
1205 | /// Makes a copy of the value in its ASCII upper case equivalent. |
1206 | /// |
1207 | /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', |
1208 | /// but non-ASCII letters are unchanged. |
1209 | /// |
1210 | /// To uppercase the value in-place, use [`make_ascii_uppercase()`]. |
1211 | /// |
1212 | /// To uppercase ASCII characters in addition to non-ASCII characters, use |
1213 | /// [`to_uppercase()`]. |
1214 | /// |
1215 | /// # Examples |
1216 | /// |
1217 | /// ``` |
1218 | /// let ascii = 'a' ; |
1219 | /// let non_ascii = '❤' ; |
1220 | /// |
1221 | /// assert_eq!('A' , ascii.to_ascii_uppercase()); |
1222 | /// assert_eq!('❤' , non_ascii.to_ascii_uppercase()); |
1223 | /// ``` |
1224 | /// |
1225 | /// [`make_ascii_uppercase()`]: #method.make_ascii_uppercase |
1226 | /// [`to_uppercase()`]: #method.to_uppercase |
1227 | #[must_use = "to uppercase the value in-place, use `make_ascii_uppercase()`" ] |
1228 | #[stable (feature = "ascii_methods_on_intrinsics" , since = "1.23.0" )] |
1229 | #[rustc_const_stable (feature = "const_ascii_methods_on_intrinsics" , since = "1.52.0" )] |
1230 | #[inline ] |
1231 | pub const fn to_ascii_uppercase(&self) -> char { |
1232 | if self.is_ascii_lowercase() { |
1233 | (*self as u8).ascii_change_case_unchecked() as char |
1234 | } else { |
1235 | *self |
1236 | } |
1237 | } |
1238 | |
1239 | /// Makes a copy of the value in its ASCII lower case equivalent. |
1240 | /// |
1241 | /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', |
1242 | /// but non-ASCII letters are unchanged. |
1243 | /// |
1244 | /// To lowercase the value in-place, use [`make_ascii_lowercase()`]. |
1245 | /// |
1246 | /// To lowercase ASCII characters in addition to non-ASCII characters, use |
1247 | /// [`to_lowercase()`]. |
1248 | /// |
1249 | /// # Examples |
1250 | /// |
1251 | /// ``` |
1252 | /// let ascii = 'A' ; |
1253 | /// let non_ascii = '❤' ; |
1254 | /// |
1255 | /// assert_eq!('a' , ascii.to_ascii_lowercase()); |
1256 | /// assert_eq!('❤' , non_ascii.to_ascii_lowercase()); |
1257 | /// ``` |
1258 | /// |
1259 | /// [`make_ascii_lowercase()`]: #method.make_ascii_lowercase |
1260 | /// [`to_lowercase()`]: #method.to_lowercase |
1261 | #[must_use = "to lowercase the value in-place, use `make_ascii_lowercase()`" ] |
1262 | #[stable (feature = "ascii_methods_on_intrinsics" , since = "1.23.0" )] |
1263 | #[rustc_const_stable (feature = "const_ascii_methods_on_intrinsics" , since = "1.52.0" )] |
1264 | #[inline ] |
1265 | pub const fn to_ascii_lowercase(&self) -> char { |
1266 | if self.is_ascii_uppercase() { |
1267 | (*self as u8).ascii_change_case_unchecked() as char |
1268 | } else { |
1269 | *self |
1270 | } |
1271 | } |
1272 | |
1273 | /// Checks that two values are an ASCII case-insensitive match. |
1274 | /// |
1275 | /// Equivalent to <code>[to_ascii_lowercase]\(a) == [to_ascii_lowercase]\(b)</code>. |
1276 | /// |
1277 | /// # Examples |
1278 | /// |
1279 | /// ``` |
1280 | /// let upper_a = 'A' ; |
1281 | /// let lower_a = 'a' ; |
1282 | /// let lower_z = 'z' ; |
1283 | /// |
1284 | /// assert!(upper_a.eq_ignore_ascii_case(&lower_a)); |
1285 | /// assert!(upper_a.eq_ignore_ascii_case(&upper_a)); |
1286 | /// assert!(!upper_a.eq_ignore_ascii_case(&lower_z)); |
1287 | /// ``` |
1288 | /// |
1289 | /// [to_ascii_lowercase]: #method.to_ascii_lowercase |
1290 | #[stable (feature = "ascii_methods_on_intrinsics" , since = "1.23.0" )] |
1291 | #[rustc_const_stable (feature = "const_ascii_methods_on_intrinsics" , since = "1.52.0" )] |
1292 | #[inline ] |
1293 | pub const fn eq_ignore_ascii_case(&self, other: &char) -> bool { |
1294 | self.to_ascii_lowercase() == other.to_ascii_lowercase() |
1295 | } |
1296 | |
1297 | /// Converts this type to its ASCII upper case equivalent in-place. |
1298 | /// |
1299 | /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', |
1300 | /// but non-ASCII letters are unchanged. |
1301 | /// |
1302 | /// To return a new uppercased value without modifying the existing one, use |
1303 | /// [`to_ascii_uppercase()`]. |
1304 | /// |
1305 | /// # Examples |
1306 | /// |
1307 | /// ``` |
1308 | /// let mut ascii = 'a' ; |
1309 | /// |
1310 | /// ascii.make_ascii_uppercase(); |
1311 | /// |
1312 | /// assert_eq!('A' , ascii); |
1313 | /// ``` |
1314 | /// |
1315 | /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase |
1316 | #[stable (feature = "ascii_methods_on_intrinsics" , since = "1.23.0" )] |
1317 | #[rustc_const_stable (feature = "const_make_ascii" , since = "1.84.0" )] |
1318 | #[inline ] |
1319 | pub const fn make_ascii_uppercase(&mut self) { |
1320 | *self = self.to_ascii_uppercase(); |
1321 | } |
1322 | |
1323 | /// Converts this type to its ASCII lower case equivalent in-place. |
1324 | /// |
1325 | /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', |
1326 | /// but non-ASCII letters are unchanged. |
1327 | /// |
1328 | /// To return a new lowercased value without modifying the existing one, use |
1329 | /// [`to_ascii_lowercase()`]. |
1330 | /// |
1331 | /// # Examples |
1332 | /// |
1333 | /// ``` |
1334 | /// let mut ascii = 'A' ; |
1335 | /// |
1336 | /// ascii.make_ascii_lowercase(); |
1337 | /// |
1338 | /// assert_eq!('a' , ascii); |
1339 | /// ``` |
1340 | /// |
1341 | /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase |
1342 | #[stable (feature = "ascii_methods_on_intrinsics" , since = "1.23.0" )] |
1343 | #[rustc_const_stable (feature = "const_make_ascii" , since = "1.84.0" )] |
1344 | #[inline ] |
1345 | pub const fn make_ascii_lowercase(&mut self) { |
1346 | *self = self.to_ascii_lowercase(); |
1347 | } |
1348 | |
1349 | /// Checks if the value is an ASCII alphabetic character: |
1350 | /// |
1351 | /// - U+0041 'A' ..= U+005A 'Z', or |
1352 | /// - U+0061 'a' ..= U+007A 'z'. |
1353 | /// |
1354 | /// # Examples |
1355 | /// |
1356 | /// ``` |
1357 | /// let uppercase_a = 'A' ; |
1358 | /// let uppercase_g = 'G' ; |
1359 | /// let a = 'a' ; |
1360 | /// let g = 'g' ; |
1361 | /// let zero = '0' ; |
1362 | /// let percent = '%' ; |
1363 | /// let space = ' ' ; |
1364 | /// let lf = ' \n' ; |
1365 | /// let esc = ' \x1b' ; |
1366 | /// |
1367 | /// assert!(uppercase_a.is_ascii_alphabetic()); |
1368 | /// assert!(uppercase_g.is_ascii_alphabetic()); |
1369 | /// assert!(a.is_ascii_alphabetic()); |
1370 | /// assert!(g.is_ascii_alphabetic()); |
1371 | /// assert!(!zero.is_ascii_alphabetic()); |
1372 | /// assert!(!percent.is_ascii_alphabetic()); |
1373 | /// assert!(!space.is_ascii_alphabetic()); |
1374 | /// assert!(!lf.is_ascii_alphabetic()); |
1375 | /// assert!(!esc.is_ascii_alphabetic()); |
1376 | /// ``` |
1377 | #[must_use ] |
1378 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
1379 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
1380 | #[inline ] |
1381 | pub const fn is_ascii_alphabetic(&self) -> bool { |
1382 | matches!(*self, 'A' ..='Z' | 'a' ..='z' ) |
1383 | } |
1384 | |
1385 | /// Checks if the value is an ASCII uppercase character: |
1386 | /// U+0041 'A' ..= U+005A 'Z'. |
1387 | /// |
1388 | /// # Examples |
1389 | /// |
1390 | /// ``` |
1391 | /// let uppercase_a = 'A' ; |
1392 | /// let uppercase_g = 'G' ; |
1393 | /// let a = 'a' ; |
1394 | /// let g = 'g' ; |
1395 | /// let zero = '0' ; |
1396 | /// let percent = '%' ; |
1397 | /// let space = ' ' ; |
1398 | /// let lf = ' \n' ; |
1399 | /// let esc = ' \x1b' ; |
1400 | /// |
1401 | /// assert!(uppercase_a.is_ascii_uppercase()); |
1402 | /// assert!(uppercase_g.is_ascii_uppercase()); |
1403 | /// assert!(!a.is_ascii_uppercase()); |
1404 | /// assert!(!g.is_ascii_uppercase()); |
1405 | /// assert!(!zero.is_ascii_uppercase()); |
1406 | /// assert!(!percent.is_ascii_uppercase()); |
1407 | /// assert!(!space.is_ascii_uppercase()); |
1408 | /// assert!(!lf.is_ascii_uppercase()); |
1409 | /// assert!(!esc.is_ascii_uppercase()); |
1410 | /// ``` |
1411 | #[must_use ] |
1412 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
1413 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
1414 | #[inline ] |
1415 | pub const fn is_ascii_uppercase(&self) -> bool { |
1416 | matches!(*self, 'A' ..='Z' ) |
1417 | } |
1418 | |
1419 | /// Checks if the value is an ASCII lowercase character: |
1420 | /// U+0061 'a' ..= U+007A 'z'. |
1421 | /// |
1422 | /// # Examples |
1423 | /// |
1424 | /// ``` |
1425 | /// let uppercase_a = 'A' ; |
1426 | /// let uppercase_g = 'G' ; |
1427 | /// let a = 'a' ; |
1428 | /// let g = 'g' ; |
1429 | /// let zero = '0' ; |
1430 | /// let percent = '%' ; |
1431 | /// let space = ' ' ; |
1432 | /// let lf = ' \n' ; |
1433 | /// let esc = ' \x1b' ; |
1434 | /// |
1435 | /// assert!(!uppercase_a.is_ascii_lowercase()); |
1436 | /// assert!(!uppercase_g.is_ascii_lowercase()); |
1437 | /// assert!(a.is_ascii_lowercase()); |
1438 | /// assert!(g.is_ascii_lowercase()); |
1439 | /// assert!(!zero.is_ascii_lowercase()); |
1440 | /// assert!(!percent.is_ascii_lowercase()); |
1441 | /// assert!(!space.is_ascii_lowercase()); |
1442 | /// assert!(!lf.is_ascii_lowercase()); |
1443 | /// assert!(!esc.is_ascii_lowercase()); |
1444 | /// ``` |
1445 | #[must_use ] |
1446 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
1447 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
1448 | #[inline ] |
1449 | pub const fn is_ascii_lowercase(&self) -> bool { |
1450 | matches!(*self, 'a' ..='z' ) |
1451 | } |
1452 | |
1453 | /// Checks if the value is an ASCII alphanumeric character: |
1454 | /// |
1455 | /// - U+0041 'A' ..= U+005A 'Z', or |
1456 | /// - U+0061 'a' ..= U+007A 'z', or |
1457 | /// - U+0030 '0' ..= U+0039 '9'. |
1458 | /// |
1459 | /// # Examples |
1460 | /// |
1461 | /// ``` |
1462 | /// let uppercase_a = 'A' ; |
1463 | /// let uppercase_g = 'G' ; |
1464 | /// let a = 'a' ; |
1465 | /// let g = 'g' ; |
1466 | /// let zero = '0' ; |
1467 | /// let percent = '%' ; |
1468 | /// let space = ' ' ; |
1469 | /// let lf = ' \n' ; |
1470 | /// let esc = ' \x1b' ; |
1471 | /// |
1472 | /// assert!(uppercase_a.is_ascii_alphanumeric()); |
1473 | /// assert!(uppercase_g.is_ascii_alphanumeric()); |
1474 | /// assert!(a.is_ascii_alphanumeric()); |
1475 | /// assert!(g.is_ascii_alphanumeric()); |
1476 | /// assert!(zero.is_ascii_alphanumeric()); |
1477 | /// assert!(!percent.is_ascii_alphanumeric()); |
1478 | /// assert!(!space.is_ascii_alphanumeric()); |
1479 | /// assert!(!lf.is_ascii_alphanumeric()); |
1480 | /// assert!(!esc.is_ascii_alphanumeric()); |
1481 | /// ``` |
1482 | #[must_use ] |
1483 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
1484 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
1485 | #[inline ] |
1486 | pub const fn is_ascii_alphanumeric(&self) -> bool { |
1487 | matches!(*self, '0' ..='9' ) | matches!(*self, 'A' ..='Z' ) | matches!(*self, 'a' ..='z' ) |
1488 | } |
1489 | |
1490 | /// Checks if the value is an ASCII decimal digit: |
1491 | /// U+0030 '0' ..= U+0039 '9'. |
1492 | /// |
1493 | /// # Examples |
1494 | /// |
1495 | /// ``` |
1496 | /// let uppercase_a = 'A' ; |
1497 | /// let uppercase_g = 'G' ; |
1498 | /// let a = 'a' ; |
1499 | /// let g = 'g' ; |
1500 | /// let zero = '0' ; |
1501 | /// let percent = '%' ; |
1502 | /// let space = ' ' ; |
1503 | /// let lf = ' \n' ; |
1504 | /// let esc = ' \x1b' ; |
1505 | /// |
1506 | /// assert!(!uppercase_a.is_ascii_digit()); |
1507 | /// assert!(!uppercase_g.is_ascii_digit()); |
1508 | /// assert!(!a.is_ascii_digit()); |
1509 | /// assert!(!g.is_ascii_digit()); |
1510 | /// assert!(zero.is_ascii_digit()); |
1511 | /// assert!(!percent.is_ascii_digit()); |
1512 | /// assert!(!space.is_ascii_digit()); |
1513 | /// assert!(!lf.is_ascii_digit()); |
1514 | /// assert!(!esc.is_ascii_digit()); |
1515 | /// ``` |
1516 | #[must_use ] |
1517 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
1518 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
1519 | #[inline ] |
1520 | pub const fn is_ascii_digit(&self) -> bool { |
1521 | matches!(*self, '0' ..='9' ) |
1522 | } |
1523 | |
1524 | /// Checks if the value is an ASCII octal digit: |
1525 | /// U+0030 '0' ..= U+0037 '7'. |
1526 | /// |
1527 | /// # Examples |
1528 | /// |
1529 | /// ``` |
1530 | /// #![feature(is_ascii_octdigit)] |
1531 | /// |
1532 | /// let uppercase_a = 'A' ; |
1533 | /// let a = 'a' ; |
1534 | /// let zero = '0' ; |
1535 | /// let seven = '7' ; |
1536 | /// let nine = '9' ; |
1537 | /// let percent = '%' ; |
1538 | /// let lf = ' \n' ; |
1539 | /// |
1540 | /// assert!(!uppercase_a.is_ascii_octdigit()); |
1541 | /// assert!(!a.is_ascii_octdigit()); |
1542 | /// assert!(zero.is_ascii_octdigit()); |
1543 | /// assert!(seven.is_ascii_octdigit()); |
1544 | /// assert!(!nine.is_ascii_octdigit()); |
1545 | /// assert!(!percent.is_ascii_octdigit()); |
1546 | /// assert!(!lf.is_ascii_octdigit()); |
1547 | /// ``` |
1548 | #[must_use ] |
1549 | #[unstable (feature = "is_ascii_octdigit" , issue = "101288" )] |
1550 | #[inline ] |
1551 | pub const fn is_ascii_octdigit(&self) -> bool { |
1552 | matches!(*self, '0' ..='7' ) |
1553 | } |
1554 | |
1555 | /// Checks if the value is an ASCII hexadecimal digit: |
1556 | /// |
1557 | /// - U+0030 '0' ..= U+0039 '9', or |
1558 | /// - U+0041 'A' ..= U+0046 'F', or |
1559 | /// - U+0061 'a' ..= U+0066 'f'. |
1560 | /// |
1561 | /// # Examples |
1562 | /// |
1563 | /// ``` |
1564 | /// let uppercase_a = 'A' ; |
1565 | /// let uppercase_g = 'G' ; |
1566 | /// let a = 'a' ; |
1567 | /// let g = 'g' ; |
1568 | /// let zero = '0' ; |
1569 | /// let percent = '%' ; |
1570 | /// let space = ' ' ; |
1571 | /// let lf = ' \n' ; |
1572 | /// let esc = ' \x1b' ; |
1573 | /// |
1574 | /// assert!(uppercase_a.is_ascii_hexdigit()); |
1575 | /// assert!(!uppercase_g.is_ascii_hexdigit()); |
1576 | /// assert!(a.is_ascii_hexdigit()); |
1577 | /// assert!(!g.is_ascii_hexdigit()); |
1578 | /// assert!(zero.is_ascii_hexdigit()); |
1579 | /// assert!(!percent.is_ascii_hexdigit()); |
1580 | /// assert!(!space.is_ascii_hexdigit()); |
1581 | /// assert!(!lf.is_ascii_hexdigit()); |
1582 | /// assert!(!esc.is_ascii_hexdigit()); |
1583 | /// ``` |
1584 | #[must_use ] |
1585 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
1586 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
1587 | #[inline ] |
1588 | pub const fn is_ascii_hexdigit(&self) -> bool { |
1589 | matches!(*self, '0' ..='9' ) | matches!(*self, 'A' ..='F' ) | matches!(*self, 'a' ..='f' ) |
1590 | } |
1591 | |
1592 | /// Checks if the value is an ASCII punctuation character: |
1593 | /// |
1594 | /// - U+0021 ..= U+002F `! " # $ % & ' ( ) * + , - . /`, or |
1595 | /// - U+003A ..= U+0040 `: ; < = > ? @`, or |
1596 | /// - U+005B ..= U+0060 ``[ \ ] ^ _ ` ``, or |
1597 | /// - U+007B ..= U+007E `{ | } ~` |
1598 | /// |
1599 | /// # Examples |
1600 | /// |
1601 | /// ``` |
1602 | /// let uppercase_a = 'A' ; |
1603 | /// let uppercase_g = 'G' ; |
1604 | /// let a = 'a' ; |
1605 | /// let g = 'g' ; |
1606 | /// let zero = '0' ; |
1607 | /// let percent = '%' ; |
1608 | /// let space = ' ' ; |
1609 | /// let lf = ' \n' ; |
1610 | /// let esc = ' \x1b' ; |
1611 | /// |
1612 | /// assert!(!uppercase_a.is_ascii_punctuation()); |
1613 | /// assert!(!uppercase_g.is_ascii_punctuation()); |
1614 | /// assert!(!a.is_ascii_punctuation()); |
1615 | /// assert!(!g.is_ascii_punctuation()); |
1616 | /// assert!(!zero.is_ascii_punctuation()); |
1617 | /// assert!(percent.is_ascii_punctuation()); |
1618 | /// assert!(!space.is_ascii_punctuation()); |
1619 | /// assert!(!lf.is_ascii_punctuation()); |
1620 | /// assert!(!esc.is_ascii_punctuation()); |
1621 | /// ``` |
1622 | #[must_use ] |
1623 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
1624 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
1625 | #[inline ] |
1626 | pub const fn is_ascii_punctuation(&self) -> bool { |
1627 | matches!(*self, '!' ..='/' ) |
1628 | | matches!(*self, ':' ..='@' ) |
1629 | | matches!(*self, '[' ..='`' ) |
1630 | | matches!(*self, '{' ..='~' ) |
1631 | } |
1632 | |
1633 | /// Checks if the value is an ASCII graphic character: |
1634 | /// U+0021 '!' ..= U+007E '~'. |
1635 | /// |
1636 | /// # Examples |
1637 | /// |
1638 | /// ``` |
1639 | /// let uppercase_a = 'A' ; |
1640 | /// let uppercase_g = 'G' ; |
1641 | /// let a = 'a' ; |
1642 | /// let g = 'g' ; |
1643 | /// let zero = '0' ; |
1644 | /// let percent = '%' ; |
1645 | /// let space = ' ' ; |
1646 | /// let lf = ' \n' ; |
1647 | /// let esc = ' \x1b' ; |
1648 | /// |
1649 | /// assert!(uppercase_a.is_ascii_graphic()); |
1650 | /// assert!(uppercase_g.is_ascii_graphic()); |
1651 | /// assert!(a.is_ascii_graphic()); |
1652 | /// assert!(g.is_ascii_graphic()); |
1653 | /// assert!(zero.is_ascii_graphic()); |
1654 | /// assert!(percent.is_ascii_graphic()); |
1655 | /// assert!(!space.is_ascii_graphic()); |
1656 | /// assert!(!lf.is_ascii_graphic()); |
1657 | /// assert!(!esc.is_ascii_graphic()); |
1658 | /// ``` |
1659 | #[must_use ] |
1660 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
1661 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
1662 | #[inline ] |
1663 | pub const fn is_ascii_graphic(&self) -> bool { |
1664 | matches!(*self, '!' ..='~' ) |
1665 | } |
1666 | |
1667 | /// Checks if the value is an ASCII whitespace character: |
1668 | /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED, |
1669 | /// U+000C FORM FEED, or U+000D CARRIAGE RETURN. |
1670 | /// |
1671 | /// Rust uses the WhatWG Infra Standard's [definition of ASCII |
1672 | /// whitespace][infra-aw]. There are several other definitions in |
1673 | /// wide use. For instance, [the POSIX locale][pct] includes |
1674 | /// U+000B VERTICAL TAB as well as all the above characters, |
1675 | /// but—from the very same specification—[the default rule for |
1676 | /// "field splitting" in the Bourne shell][bfs] considers *only* |
1677 | /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace. |
1678 | /// |
1679 | /// If you are writing a program that will process an existing |
1680 | /// file format, check what that format's definition of whitespace is |
1681 | /// before using this function. |
1682 | /// |
1683 | /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace |
1684 | /// [pct]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01 |
1685 | /// [bfs]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05 |
1686 | /// |
1687 | /// # Examples |
1688 | /// |
1689 | /// ``` |
1690 | /// let uppercase_a = 'A' ; |
1691 | /// let uppercase_g = 'G' ; |
1692 | /// let a = 'a' ; |
1693 | /// let g = 'g' ; |
1694 | /// let zero = '0' ; |
1695 | /// let percent = '%' ; |
1696 | /// let space = ' ' ; |
1697 | /// let lf = ' \n' ; |
1698 | /// let esc = ' \x1b' ; |
1699 | /// |
1700 | /// assert!(!uppercase_a.is_ascii_whitespace()); |
1701 | /// assert!(!uppercase_g.is_ascii_whitespace()); |
1702 | /// assert!(!a.is_ascii_whitespace()); |
1703 | /// assert!(!g.is_ascii_whitespace()); |
1704 | /// assert!(!zero.is_ascii_whitespace()); |
1705 | /// assert!(!percent.is_ascii_whitespace()); |
1706 | /// assert!(space.is_ascii_whitespace()); |
1707 | /// assert!(lf.is_ascii_whitespace()); |
1708 | /// assert!(!esc.is_ascii_whitespace()); |
1709 | /// ``` |
1710 | #[must_use ] |
1711 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
1712 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
1713 | #[inline ] |
1714 | pub const fn is_ascii_whitespace(&self) -> bool { |
1715 | matches!(*self, ' \t' | ' \n' | ' \x0C' | ' \r' | ' ' ) |
1716 | } |
1717 | |
1718 | /// Checks if the value is an ASCII control character: |
1719 | /// U+0000 NUL ..= U+001F UNIT SEPARATOR, or U+007F DELETE. |
1720 | /// Note that most ASCII whitespace characters are control |
1721 | /// characters, but SPACE is not. |
1722 | /// |
1723 | /// # Examples |
1724 | /// |
1725 | /// ``` |
1726 | /// let uppercase_a = 'A' ; |
1727 | /// let uppercase_g = 'G' ; |
1728 | /// let a = 'a' ; |
1729 | /// let g = 'g' ; |
1730 | /// let zero = '0' ; |
1731 | /// let percent = '%' ; |
1732 | /// let space = ' ' ; |
1733 | /// let lf = ' \n' ; |
1734 | /// let esc = ' \x1b' ; |
1735 | /// |
1736 | /// assert!(!uppercase_a.is_ascii_control()); |
1737 | /// assert!(!uppercase_g.is_ascii_control()); |
1738 | /// assert!(!a.is_ascii_control()); |
1739 | /// assert!(!g.is_ascii_control()); |
1740 | /// assert!(!zero.is_ascii_control()); |
1741 | /// assert!(!percent.is_ascii_control()); |
1742 | /// assert!(!space.is_ascii_control()); |
1743 | /// assert!(lf.is_ascii_control()); |
1744 | /// assert!(esc.is_ascii_control()); |
1745 | /// ``` |
1746 | #[must_use ] |
1747 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
1748 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
1749 | #[inline ] |
1750 | pub const fn is_ascii_control(&self) -> bool { |
1751 | matches!(*self, ' \0' ..=' \x1F' | ' \x7F' ) |
1752 | } |
1753 | } |
1754 | |
1755 | pub(crate) struct EscapeDebugExtArgs { |
1756 | /// Escape Extended Grapheme codepoints? |
1757 | pub(crate) escape_grapheme_extended: bool, |
1758 | |
1759 | /// Escape single quotes? |
1760 | pub(crate) escape_single_quote: bool, |
1761 | |
1762 | /// Escape double quotes? |
1763 | pub(crate) escape_double_quote: bool, |
1764 | } |
1765 | |
1766 | impl EscapeDebugExtArgs { |
1767 | pub(crate) const ESCAPE_ALL: Self = Self { |
1768 | escape_grapheme_extended: true, |
1769 | escape_single_quote: true, |
1770 | escape_double_quote: true, |
1771 | }; |
1772 | } |
1773 | |
1774 | #[inline ] |
1775 | #[must_use ] |
1776 | const fn len_utf8(code: u32) -> usize { |
1777 | match code { |
1778 | ..MAX_ONE_B => 1, |
1779 | ..MAX_TWO_B => 2, |
1780 | ..MAX_THREE_B => 3, |
1781 | _ => 4, |
1782 | } |
1783 | } |
1784 | |
1785 | #[inline ] |
1786 | #[must_use ] |
1787 | const fn len_utf16(code: u32) -> usize { |
1788 | if (code & 0xFFFF) == code { 1 } else { 2 } |
1789 | } |
1790 | |
1791 | /// Encodes a raw `u32` value as UTF-8 into the provided byte buffer, |
1792 | /// and then returns the subslice of the buffer that contains the encoded character. |
1793 | /// |
1794 | /// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range. |
1795 | /// (Creating a `char` in the surrogate range is UB.) |
1796 | /// The result is valid [generalized UTF-8] but not valid UTF-8. |
1797 | /// |
1798 | /// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8 |
1799 | /// |
1800 | /// # Panics |
1801 | /// |
1802 | /// Panics if the buffer is not large enough. |
1803 | /// A buffer of length four is large enough to encode any `char`. |
1804 | #[unstable (feature = "char_internals" , reason = "exposed only for libstd" , issue = "none" )] |
1805 | #[doc (hidden)] |
1806 | #[inline ] |
1807 | pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] { |
1808 | let len = len_utf8(code); |
1809 | match (len, &mut *dst) { |
1810 | (1, [a, ..]) => { |
1811 | *a = code as u8; |
1812 | } |
1813 | (2, [a, b, ..]) => { |
1814 | *a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; |
1815 | *b = (code & 0x3F) as u8 | TAG_CONT; |
1816 | } |
1817 | (3, [a, b, c, ..]) => { |
1818 | *a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; |
1819 | *b = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
1820 | *c = (code & 0x3F) as u8 | TAG_CONT; |
1821 | } |
1822 | (4, [a, b, c, d, ..]) => { |
1823 | *a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; |
1824 | *b = (code >> 12 & 0x3F) as u8 | TAG_CONT; |
1825 | *c = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
1826 | *d = (code & 0x3F) as u8 | TAG_CONT; |
1827 | } |
1828 | _ => { |
1829 | const_panic!( |
1830 | "encode_utf8: buffer does not have enough bytes to encode code point" , |
1831 | "encode_utf8: need {len} bytes to encode U+ {code:04X} but buffer has just {dst_len}" , |
1832 | code: u32 = code, |
1833 | len: usize = len, |
1834 | dst_len: usize = dst.len(), |
1835 | ) |
1836 | } |
1837 | }; |
1838 | // SAFETY: `<&mut [u8]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds. |
1839 | unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) } |
1840 | } |
1841 | |
1842 | /// Encodes a raw `u32` value as native endian UTF-16 into the provided `u16` buffer, |
1843 | /// and then returns the subslice of the buffer that contains the encoded character. |
1844 | /// |
1845 | /// Unlike `char::encode_utf16`, this method also handles codepoints in the surrogate range. |
1846 | /// (Creating a `char` in the surrogate range is UB.) |
1847 | /// |
1848 | /// # Panics |
1849 | /// |
1850 | /// Panics if the buffer is not large enough. |
1851 | /// A buffer of length 2 is large enough to encode any `char`. |
1852 | #[unstable (feature = "char_internals" , reason = "exposed only for libstd" , issue = "none" )] |
1853 | #[doc (hidden)] |
1854 | #[inline ] |
1855 | pub const fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] { |
1856 | let len: usize = len_utf16(code); |
1857 | match (len, &mut *dst) { |
1858 | (1, [a: &mut u16, ..]) => { |
1859 | *a = code as u16; |
1860 | } |
1861 | (2, [a: &mut u16, b: &mut u16, ..]) => { |
1862 | code -= 0x1_0000; |
1863 | *a = (code >> 10) as u16 | 0xD800; |
1864 | *b = (code & 0x3FF) as u16 | 0xDC00; |
1865 | } |
1866 | _ => { |
1867 | const_panic!( |
1868 | "encode_utf16: buffer does not have enough bytes to encode code point" , |
1869 | "encode_utf16: need {len} bytes to encode U+ {code:04X} but buffer has just {dst_len}" , |
1870 | code: u32 = code, |
1871 | len: usize = len, |
1872 | dst_len: usize = dst.len(), |
1873 | ) |
1874 | } |
1875 | }; |
1876 | // SAFETY: `<&mut [u16]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds. |
1877 | unsafe { slice::from_raw_parts_mut(data:dst.as_mut_ptr(), len) } |
1878 | } |
1879 | |