1 | //! impl char {} |
2 | |
3 | use crate::slice; |
4 | use crate::str::from_utf8_unchecked_mut; |
5 | use crate::unicode::printable::is_printable; |
6 | use crate::unicode::{self, conversions}; |
7 | |
8 | use super::*; |
9 | |
10 | impl char { |
11 | /// The lowest valid code point a `char` can have, `'\0'`. |
12 | /// |
13 | /// Unlike integer types, `char` actually has a gap in the middle, |
14 | /// meaning that the range of possible `char`s is smaller than you |
15 | /// might expect. Ranges of `char` will automatically hop this gap |
16 | /// for you: |
17 | /// |
18 | /// ``` |
19 | /// #![feature(char_min)] |
20 | /// let dist = u32::from(char::MAX) - u32::from(char::MIN); |
21 | /// let size = (char::MIN..=char::MAX).count() as u32; |
22 | /// assert!(size < dist); |
23 | /// ``` |
24 | /// |
25 | /// Despite this gap, the `MIN` and [`MAX`] values can be used as bounds for |
26 | /// all `char` values. |
27 | /// |
28 | /// [`MAX`]: char::MAX |
29 | /// |
30 | /// # Examples |
31 | /// |
32 | /// ``` |
33 | /// #![feature(char_min)] |
34 | /// # fn something_which_returns_char() -> char { 'a' } |
35 | /// let c: char = something_which_returns_char(); |
36 | /// assert!(char::MIN <= c); |
37 | /// |
38 | /// let value_at_min = u32::from(char::MIN); |
39 | /// assert_eq!(char::from_u32(value_at_min), Some(' \0' )); |
40 | /// ``` |
41 | #[unstable (feature = "char_min" , issue = "114298" )] |
42 | pub const MIN: char = ' \0' ; |
43 | |
44 | /// The highest valid code point a `char` can have, `'\u{10FFFF}'`. |
45 | /// |
46 | /// Unlike integer types, `char` actually has a gap in the middle, |
47 | /// meaning that the range of possible `char`s is smaller than you |
48 | /// might expect. Ranges of `char` will automatically hop this gap |
49 | /// for you: |
50 | /// |
51 | /// ``` |
52 | /// #![feature(char_min)] |
53 | /// let dist = u32::from(char::MAX) - u32::from(char::MIN); |
54 | /// let size = (char::MIN..=char::MAX).count() as u32; |
55 | /// assert!(size < dist); |
56 | /// ``` |
57 | /// |
58 | /// Despite this gap, the [`MIN`] and `MAX` values can be used as bounds for |
59 | /// all `char` values. |
60 | /// |
61 | /// [`MIN`]: char::MIN |
62 | /// |
63 | /// # Examples |
64 | /// |
65 | /// ``` |
66 | /// # fn something_which_returns_char() -> char { 'a' } |
67 | /// let c: char = something_which_returns_char(); |
68 | /// assert!(c <= char::MAX); |
69 | /// |
70 | /// let value_at_max = u32::from(char::MAX); |
71 | /// assert_eq!(char::from_u32(value_at_max), Some(' \u{10FFFF}' )); |
72 | /// assert_eq!(char::from_u32(value_at_max + 1), None); |
73 | /// ``` |
74 | #[stable (feature = "assoc_char_consts" , since = "1.52.0" )] |
75 | pub const MAX: char = ' \u{10ffff}' ; |
76 | |
77 | /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a |
78 | /// decoding error. |
79 | /// |
80 | /// It can occur, for example, when giving ill-formed UTF-8 bytes to |
81 | /// [`String::from_utf8_lossy`](../std/string/struct.String.html#method.from_utf8_lossy). |
82 | #[stable (feature = "assoc_char_consts" , since = "1.52.0" )] |
83 | pub const REPLACEMENT_CHARACTER: char = ' \u{FFFD}' ; |
84 | |
85 | /// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of |
86 | /// `char` and `str` methods are based on. |
87 | /// |
88 | /// New versions of Unicode are released regularly and subsequently all methods |
89 | /// in the standard library depending on Unicode are updated. Therefore the |
90 | /// behavior of some `char` and `str` methods and the value of this constant |
91 | /// changes over time. This is *not* considered to be a breaking change. |
92 | /// |
93 | /// The version numbering scheme is explained in |
94 | /// [Unicode 11.0 or later, Section 3.1 Versions of the Unicode Standard](https://www.unicode.org/versions/Unicode11.0.0/ch03.pdf#page=4). |
95 | #[stable (feature = "assoc_char_consts" , since = "1.52.0" )] |
96 | pub const UNICODE_VERSION: (u8, u8, u8) = crate::unicode::UNICODE_VERSION; |
97 | |
98 | /// Creates an iterator over the UTF-16 encoded code points in `iter`, |
99 | /// returning unpaired surrogates as `Err`s. |
100 | /// |
101 | /// # Examples |
102 | /// |
103 | /// Basic usage: |
104 | /// |
105 | /// ``` |
106 | /// // 𝄞mus<invalid>ic<invalid> |
107 | /// let v = [ |
108 | /// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834, |
109 | /// ]; |
110 | /// |
111 | /// assert_eq!( |
112 | /// char::decode_utf16(v) |
113 | /// .map(|r| r.map_err(|e| e.unpaired_surrogate())) |
114 | /// .collect::<Vec<_>>(), |
115 | /// vec![ |
116 | /// Ok('𝄞' ), |
117 | /// Ok('m' ), Ok('u' ), Ok('s' ), |
118 | /// Err(0xDD1E), |
119 | /// Ok('i' ), Ok('c' ), |
120 | /// Err(0xD834) |
121 | /// ] |
122 | /// ); |
123 | /// ``` |
124 | /// |
125 | /// A lossy decoder can be obtained by replacing `Err` results with the replacement character: |
126 | /// |
127 | /// ``` |
128 | /// // 𝄞mus<invalid>ic<invalid> |
129 | /// let v = [ |
130 | /// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834, |
131 | /// ]; |
132 | /// |
133 | /// assert_eq!( |
134 | /// char::decode_utf16(v) |
135 | /// .map(|r| r.unwrap_or(char::REPLACEMENT_CHARACTER)) |
136 | /// .collect::<String>(), |
137 | /// "𝄞mus�ic�" |
138 | /// ); |
139 | /// ``` |
140 | #[stable (feature = "assoc_char_funcs" , since = "1.52.0" )] |
141 | #[inline ] |
142 | pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> { |
143 | super::decode::decode_utf16(iter) |
144 | } |
145 | |
146 | /// Converts a `u32` to a `char`. |
147 | /// |
148 | /// Note that all `char`s are valid [`u32`]s, and can be cast to one with |
149 | /// [`as`](../std/keyword.as.html): |
150 | /// |
151 | /// ``` |
152 | /// let c = '💯' ; |
153 | /// let i = c as u32; |
154 | /// |
155 | /// assert_eq!(128175, i); |
156 | /// ``` |
157 | /// |
158 | /// However, the reverse is not true: not all valid [`u32`]s are valid |
159 | /// `char`s. `from_u32()` will return `None` if the input is not a valid value |
160 | /// for a `char`. |
161 | /// |
162 | /// For an unsafe version of this function which ignores these checks, see |
163 | /// [`from_u32_unchecked`]. |
164 | /// |
165 | /// [`from_u32_unchecked`]: #method.from_u32_unchecked |
166 | /// |
167 | /// # Examples |
168 | /// |
169 | /// Basic usage: |
170 | /// |
171 | /// ``` |
172 | /// let c = char::from_u32(0x2764); |
173 | /// |
174 | /// assert_eq!(Some('❤' ), c); |
175 | /// ``` |
176 | /// |
177 | /// Returning `None` when the input is not a valid `char`: |
178 | /// |
179 | /// ``` |
180 | /// let c = char::from_u32(0x110000); |
181 | /// |
182 | /// assert_eq!(None, c); |
183 | /// ``` |
184 | #[stable (feature = "assoc_char_funcs" , since = "1.52.0" )] |
185 | #[rustc_const_stable (feature = "const_char_convert" , since = "1.67.0" )] |
186 | #[must_use ] |
187 | #[inline ] |
188 | pub const fn from_u32(i: u32) -> Option<char> { |
189 | super::convert::from_u32(i) |
190 | } |
191 | |
192 | /// Converts a `u32` to a `char`, ignoring validity. |
193 | /// |
194 | /// Note that all `char`s are valid [`u32`]s, and can be cast to one with |
195 | /// `as`: |
196 | /// |
197 | /// ``` |
198 | /// let c = '💯' ; |
199 | /// let i = c as u32; |
200 | /// |
201 | /// assert_eq!(128175, i); |
202 | /// ``` |
203 | /// |
204 | /// However, the reverse is not true: not all valid [`u32`]s are valid |
205 | /// `char`s. `from_u32_unchecked()` will ignore this, and blindly cast to |
206 | /// `char`, possibly creating an invalid one. |
207 | /// |
208 | /// # Safety |
209 | /// |
210 | /// This function is unsafe, as it may construct invalid `char` values. |
211 | /// |
212 | /// For a safe version of this function, see the [`from_u32`] function. |
213 | /// |
214 | /// [`from_u32`]: #method.from_u32 |
215 | /// |
216 | /// # Examples |
217 | /// |
218 | /// Basic usage: |
219 | /// |
220 | /// ``` |
221 | /// let c = unsafe { char::from_u32_unchecked(0x2764) }; |
222 | /// |
223 | /// assert_eq!('❤' , c); |
224 | /// ``` |
225 | #[stable (feature = "assoc_char_funcs" , since = "1.52.0" )] |
226 | #[rustc_const_unstable (feature = "const_char_from_u32_unchecked" , issue = "89259" )] |
227 | #[must_use ] |
228 | #[inline ] |
229 | pub const unsafe fn from_u32_unchecked(i: u32) -> char { |
230 | // SAFETY: the safety contract must be upheld by the caller. |
231 | unsafe { super::convert::from_u32_unchecked(i) } |
232 | } |
233 | |
234 | /// Converts a digit in the given radix to a `char`. |
235 | /// |
236 | /// A 'radix' here is sometimes also called a 'base'. A radix of two |
237 | /// indicates a binary number, a radix of ten, decimal, and a radix of |
238 | /// sixteen, hexadecimal, to give some common values. Arbitrary |
239 | /// radices are supported. |
240 | /// |
241 | /// `from_digit()` will return `None` if the input is not a digit in |
242 | /// the given radix. |
243 | /// |
244 | /// # Panics |
245 | /// |
246 | /// Panics if given a radix larger than 36. |
247 | /// |
248 | /// # Examples |
249 | /// |
250 | /// Basic usage: |
251 | /// |
252 | /// ``` |
253 | /// let c = char::from_digit(4, 10); |
254 | /// |
255 | /// assert_eq!(Some('4' ), c); |
256 | /// |
257 | /// // Decimal 11 is a single digit in base 16 |
258 | /// let c = char::from_digit(11, 16); |
259 | /// |
260 | /// assert_eq!(Some('b' ), c); |
261 | /// ``` |
262 | /// |
263 | /// Returning `None` when the input is not a digit: |
264 | /// |
265 | /// ``` |
266 | /// let c = char::from_digit(20, 10); |
267 | /// |
268 | /// assert_eq!(None, c); |
269 | /// ``` |
270 | /// |
271 | /// Passing a large radix, causing a panic: |
272 | /// |
273 | /// ```should_panic |
274 | /// // this panics |
275 | /// let _c = char::from_digit(1, 37); |
276 | /// ``` |
277 | #[stable (feature = "assoc_char_funcs" , since = "1.52.0" )] |
278 | #[rustc_const_stable (feature = "const_char_convert" , since = "1.67.0" )] |
279 | #[must_use ] |
280 | #[inline ] |
281 | pub const fn from_digit(num: u32, radix: u32) -> Option<char> { |
282 | super::convert::from_digit(num, radix) |
283 | } |
284 | |
285 | /// Checks if a `char` is a digit in the given radix. |
286 | /// |
287 | /// A 'radix' here is sometimes also called a 'base'. A radix of two |
288 | /// indicates a binary number, a radix of ten, decimal, and a radix of |
289 | /// sixteen, hexadecimal, to give some common values. Arbitrary |
290 | /// radices are supported. |
291 | /// |
292 | /// Compared to [`is_numeric()`], this function only recognizes the characters |
293 | /// `0-9`, `a-z` and `A-Z`. |
294 | /// |
295 | /// 'Digit' is defined to be only the following characters: |
296 | /// |
297 | /// * `0-9` |
298 | /// * `a-z` |
299 | /// * `A-Z` |
300 | /// |
301 | /// For a more comprehensive understanding of 'digit', see [`is_numeric()`]. |
302 | /// |
303 | /// [`is_numeric()`]: #method.is_numeric |
304 | /// |
305 | /// # Panics |
306 | /// |
307 | /// Panics if given a radix larger than 36. |
308 | /// |
309 | /// # Examples |
310 | /// |
311 | /// Basic usage: |
312 | /// |
313 | /// ``` |
314 | /// assert!('1' .is_digit(10)); |
315 | /// assert!('f' .is_digit(16)); |
316 | /// assert!(!'f' .is_digit(10)); |
317 | /// ``` |
318 | /// |
319 | /// Passing a large radix, causing a panic: |
320 | /// |
321 | /// ```should_panic |
322 | /// // this panics |
323 | /// '1' .is_digit(37); |
324 | /// ``` |
325 | #[stable (feature = "rust1" , since = "1.0.0" )] |
326 | #[inline ] |
327 | pub fn is_digit(self, radix: u32) -> bool { |
328 | self.to_digit(radix).is_some() |
329 | } |
330 | |
331 | /// Converts a `char` to a digit in the given radix. |
332 | /// |
333 | /// A 'radix' here is sometimes also called a 'base'. A radix of two |
334 | /// indicates a binary number, a radix of ten, decimal, and a radix of |
335 | /// sixteen, hexadecimal, to give some common values. Arbitrary |
336 | /// radices are supported. |
337 | /// |
338 | /// 'Digit' is defined to be only the following characters: |
339 | /// |
340 | /// * `0-9` |
341 | /// * `a-z` |
342 | /// * `A-Z` |
343 | /// |
344 | /// # Errors |
345 | /// |
346 | /// Returns `None` if the `char` does not refer to a digit in the given radix. |
347 | /// |
348 | /// # Panics |
349 | /// |
350 | /// Panics if given a radix larger than 36. |
351 | /// |
352 | /// # Examples |
353 | /// |
354 | /// Basic usage: |
355 | /// |
356 | /// ``` |
357 | /// assert_eq!('1' .to_digit(10), Some(1)); |
358 | /// assert_eq!('f' .to_digit(16), Some(15)); |
359 | /// ``` |
360 | /// |
361 | /// Passing a non-digit results in failure: |
362 | /// |
363 | /// ``` |
364 | /// assert_eq!('f' .to_digit(10), None); |
365 | /// assert_eq!('z' .to_digit(16), None); |
366 | /// ``` |
367 | /// |
368 | /// Passing a large radix, causing a panic: |
369 | /// |
370 | /// ```should_panic |
371 | /// // this panics |
372 | /// let _ = '1' .to_digit(37); |
373 | /// ``` |
374 | #[stable (feature = "rust1" , since = "1.0.0" )] |
375 | #[rustc_const_stable (feature = "const_char_convert" , since = "1.67.0" )] |
376 | #[must_use = "this returns the result of the operation, \ |
377 | without modifying the original" ] |
378 | #[inline ] |
379 | pub const fn to_digit(self, radix: u32) -> Option<u32> { |
380 | // If not a digit, a number greater than radix will be created. |
381 | let mut digit = (self as u32).wrapping_sub('0' as u32); |
382 | if radix > 10 { |
383 | assert!(radix <= 36, "to_digit: radix is too high (maximum 36)" ); |
384 | if digit < 10 { |
385 | return Some(digit); |
386 | } |
387 | // Force the 6th bit to be set to ensure ascii is lower case. |
388 | digit = (self as u32 | 0b10_0000).wrapping_sub('a' as u32).saturating_add(10); |
389 | } |
390 | // FIXME: once then_some is const fn, use it here |
391 | if digit < radix { Some(digit) } else { None } |
392 | } |
393 | |
394 | /// Returns an iterator that yields the hexadecimal Unicode escape of a |
395 | /// character as `char`s. |
396 | /// |
397 | /// This will escape characters with the Rust syntax of the form |
398 | /// `\u{NNNNNN}` where `NNNNNN` is a hexadecimal representation. |
399 | /// |
400 | /// # Examples |
401 | /// |
402 | /// As an iterator: |
403 | /// |
404 | /// ``` |
405 | /// for c in '❤' .escape_unicode() { |
406 | /// print!("{c}" ); |
407 | /// } |
408 | /// println!(); |
409 | /// ``` |
410 | /// |
411 | /// Using `println!` directly: |
412 | /// |
413 | /// ``` |
414 | /// println!("{}" , '❤' .escape_unicode()); |
415 | /// ``` |
416 | /// |
417 | /// Both are equivalent to: |
418 | /// |
419 | /// ``` |
420 | /// println!(" \\u{{2764}}" ); |
421 | /// ``` |
422 | /// |
423 | /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string): |
424 | /// |
425 | /// ``` |
426 | /// assert_eq!('❤' .escape_unicode().to_string(), " \\u{2764}" ); |
427 | /// ``` |
428 | #[must_use = "this returns the escaped char as an iterator, \ |
429 | without modifying the original" ] |
430 | #[stable (feature = "rust1" , since = "1.0.0" )] |
431 | #[inline ] |
432 | pub fn escape_unicode(self) -> EscapeUnicode { |
433 | EscapeUnicode::new(self) |
434 | } |
435 | |
436 | /// An extended version of `escape_debug` that optionally permits escaping |
437 | /// Extended Grapheme codepoints, single quotes, and double quotes. This |
438 | /// allows us to format characters like nonspacing marks better when they're |
439 | /// at the start of a string, and allows escaping single quotes in |
440 | /// characters, and double quotes in strings. |
441 | #[inline ] |
442 | pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> EscapeDebug { |
443 | match self { |
444 | ' \0' => EscapeDebug::backslash(ascii::Char::Digit0), |
445 | ' \t' => EscapeDebug::backslash(ascii::Char::SmallT), |
446 | ' \r' => EscapeDebug::backslash(ascii::Char::SmallR), |
447 | ' \n' => EscapeDebug::backslash(ascii::Char::SmallN), |
448 | ' \\' => EscapeDebug::backslash(ascii::Char::ReverseSolidus), |
449 | ' \"' if args.escape_double_quote => EscapeDebug::backslash(ascii::Char::QuotationMark), |
450 | ' \'' if args.escape_single_quote => EscapeDebug::backslash(ascii::Char::Apostrophe), |
451 | _ if args.escape_grapheme_extended && self.is_grapheme_extended() => { |
452 | EscapeDebug::from_unicode(self.escape_unicode()) |
453 | } |
454 | _ if is_printable(self) => EscapeDebug::printable(self), |
455 | _ => EscapeDebug::from_unicode(self.escape_unicode()), |
456 | } |
457 | } |
458 | |
459 | /// Returns an iterator that yields the literal escape code of a character |
460 | /// as `char`s. |
461 | /// |
462 | /// This will escape the characters similar to the [`Debug`](core::fmt::Debug) implementations |
463 | /// of `str` or `char`. |
464 | /// |
465 | /// # Examples |
466 | /// |
467 | /// As an iterator: |
468 | /// |
469 | /// ``` |
470 | /// for c in ' \n' .escape_debug() { |
471 | /// print!("{c}" ); |
472 | /// } |
473 | /// println!(); |
474 | /// ``` |
475 | /// |
476 | /// Using `println!` directly: |
477 | /// |
478 | /// ``` |
479 | /// println!("{}" , ' \n' .escape_debug()); |
480 | /// ``` |
481 | /// |
482 | /// Both are equivalent to: |
483 | /// |
484 | /// ``` |
485 | /// println!(" \\n" ); |
486 | /// ``` |
487 | /// |
488 | /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string): |
489 | /// |
490 | /// ``` |
491 | /// assert_eq!(' \n' .escape_debug().to_string(), " \\n" ); |
492 | /// ``` |
493 | #[must_use = "this returns the escaped char as an iterator, \ |
494 | without modifying the original" ] |
495 | #[stable (feature = "char_escape_debug" , since = "1.20.0" )] |
496 | #[inline ] |
497 | pub fn escape_debug(self) -> EscapeDebug { |
498 | self.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL) |
499 | } |
500 | |
501 | /// Returns an iterator that yields the literal escape code of a character |
502 | /// as `char`s. |
503 | /// |
504 | /// The default is chosen with a bias toward producing literals that are |
505 | /// legal in a variety of languages, including C++11 and similar C-family |
506 | /// languages. The exact rules are: |
507 | /// |
508 | /// * Tab is escaped as `\t`. |
509 | /// * Carriage return is escaped as `\r`. |
510 | /// * Line feed is escaped as `\n`. |
511 | /// * Single quote is escaped as `\'`. |
512 | /// * Double quote is escaped as `\"`. |
513 | /// * Backslash is escaped as `\\`. |
514 | /// * Any character in the 'printable ASCII' range `0x20` .. `0x7e` |
515 | /// inclusive is not escaped. |
516 | /// * All other characters are given hexadecimal Unicode escapes; see |
517 | /// [`escape_unicode`]. |
518 | /// |
519 | /// [`escape_unicode`]: #method.escape_unicode |
520 | /// |
521 | /// # Examples |
522 | /// |
523 | /// As an iterator: |
524 | /// |
525 | /// ``` |
526 | /// for c in '"' .escape_default() { |
527 | /// print!("{c}" ); |
528 | /// } |
529 | /// println!(); |
530 | /// ``` |
531 | /// |
532 | /// Using `println!` directly: |
533 | /// |
534 | /// ``` |
535 | /// println!("{}" , '"' .escape_default()); |
536 | /// ``` |
537 | /// |
538 | /// Both are equivalent to: |
539 | /// |
540 | /// ``` |
541 | /// println!(" \\\"" ); |
542 | /// ``` |
543 | /// |
544 | /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string): |
545 | /// |
546 | /// ``` |
547 | /// assert_eq!('"' .escape_default().to_string(), " \\\"" ); |
548 | /// ``` |
549 | #[must_use = "this returns the escaped char as an iterator, \ |
550 | without modifying the original" ] |
551 | #[stable (feature = "rust1" , since = "1.0.0" )] |
552 | #[inline ] |
553 | pub fn escape_default(self) -> EscapeDefault { |
554 | match self { |
555 | ' \t' => EscapeDefault::backslash(ascii::Char::SmallT), |
556 | ' \r' => EscapeDefault::backslash(ascii::Char::SmallR), |
557 | ' \n' => EscapeDefault::backslash(ascii::Char::SmallN), |
558 | ' \\' | ' \'' | '"' => EscapeDefault::backslash(self.as_ascii().unwrap()), |
559 | ' \x20' ..=' \x7e' => EscapeDefault::printable(self.as_ascii().unwrap()), |
560 | _ => EscapeDefault::from_unicode(self.escape_unicode()), |
561 | } |
562 | } |
563 | |
564 | /// Returns the number of bytes this `char` would need if encoded in UTF-8. |
565 | /// |
566 | /// That number of bytes is always between 1 and 4, inclusive. |
567 | /// |
568 | /// # Examples |
569 | /// |
570 | /// Basic usage: |
571 | /// |
572 | /// ``` |
573 | /// let len = 'A' .len_utf8(); |
574 | /// assert_eq!(len, 1); |
575 | /// |
576 | /// let len = 'ß' .len_utf8(); |
577 | /// assert_eq!(len, 2); |
578 | /// |
579 | /// let len = 'ℝ' .len_utf8(); |
580 | /// assert_eq!(len, 3); |
581 | /// |
582 | /// let len = '💣' .len_utf8(); |
583 | /// assert_eq!(len, 4); |
584 | /// ``` |
585 | /// |
586 | /// The `&str` type guarantees that its contents are UTF-8, and so we can compare the length it |
587 | /// would take if each code point was represented as a `char` vs in the `&str` itself: |
588 | /// |
589 | /// ``` |
590 | /// // as chars |
591 | /// let eastern = '東' ; |
592 | /// let capital = '京' ; |
593 | /// |
594 | /// // both can be represented as three bytes |
595 | /// assert_eq!(3, eastern.len_utf8()); |
596 | /// assert_eq!(3, capital.len_utf8()); |
597 | /// |
598 | /// // as a &str, these two are encoded in UTF-8 |
599 | /// let tokyo = "東京" ; |
600 | /// |
601 | /// let len = eastern.len_utf8() + capital.len_utf8(); |
602 | /// |
603 | /// // we can see that they take six bytes total... |
604 | /// assert_eq!(6, tokyo.len()); |
605 | /// |
606 | /// // ... just like the &str |
607 | /// assert_eq!(len, tokyo.len()); |
608 | /// ``` |
609 | #[stable (feature = "rust1" , since = "1.0.0" )] |
610 | #[rustc_const_stable (feature = "const_char_len_utf" , since = "1.52.0" )] |
611 | #[inline ] |
612 | pub const fn len_utf8(self) -> usize { |
613 | len_utf8(self as u32) |
614 | } |
615 | |
616 | /// Returns the number of 16-bit code units this `char` would need if |
617 | /// encoded in UTF-16. |
618 | /// |
619 | /// That number of code units is always either 1 or 2, for unicode scalar values in |
620 | /// the [basic multilingual plane] or [supplementary planes] respectively. |
621 | /// |
622 | /// See the documentation for [`len_utf8()`] for more explanation of this |
623 | /// concept. This function is a mirror, but for UTF-16 instead of UTF-8. |
624 | /// |
625 | /// [basic multilingual plane]: http://www.unicode.org/glossary/#basic_multilingual_plane |
626 | /// [supplementary planes]: http://www.unicode.org/glossary/#supplementary_planes |
627 | /// [`len_utf8()`]: #method.len_utf8 |
628 | /// |
629 | /// # Examples |
630 | /// |
631 | /// Basic usage: |
632 | /// |
633 | /// ``` |
634 | /// let n = 'ß' .len_utf16(); |
635 | /// assert_eq!(n, 1); |
636 | /// |
637 | /// let len = '💣' .len_utf16(); |
638 | /// assert_eq!(len, 2); |
639 | /// ``` |
640 | #[stable (feature = "rust1" , since = "1.0.0" )] |
641 | #[rustc_const_stable (feature = "const_char_len_utf" , since = "1.52.0" )] |
642 | #[inline ] |
643 | pub const fn len_utf16(self) -> usize { |
644 | let ch = self as u32; |
645 | if (ch & 0xFFFF) == ch { 1 } else { 2 } |
646 | } |
647 | |
648 | /// Encodes this character as UTF-8 into the provided byte buffer, |
649 | /// and then returns the subslice of the buffer that contains the encoded character. |
650 | /// |
651 | /// # Panics |
652 | /// |
653 | /// Panics if the buffer is not large enough. |
654 | /// A buffer of length four is large enough to encode any `char`. |
655 | /// |
656 | /// # Examples |
657 | /// |
658 | /// In both of these examples, 'ß' takes two bytes to encode. |
659 | /// |
660 | /// ``` |
661 | /// let mut b = [0; 2]; |
662 | /// |
663 | /// let result = 'ß' .encode_utf8(&mut b); |
664 | /// |
665 | /// assert_eq!(result, "ß" ); |
666 | /// |
667 | /// assert_eq!(result.len(), 2); |
668 | /// ``` |
669 | /// |
670 | /// A buffer that's too small: |
671 | /// |
672 | /// ```should_panic |
673 | /// let mut b = [0; 1]; |
674 | /// |
675 | /// // this panics |
676 | /// 'ß' .encode_utf8(&mut b); |
677 | /// ``` |
678 | #[stable (feature = "unicode_encode_char" , since = "1.15.0" )] |
679 | #[inline ] |
680 | pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str { |
681 | // SAFETY: `char` is not a surrogate, so this is valid UTF-8. |
682 | unsafe { from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) } |
683 | } |
684 | |
685 | /// Encodes this character as UTF-16 into the provided `u16` buffer, |
686 | /// and then returns the subslice of the buffer that contains the encoded character. |
687 | /// |
688 | /// # Panics |
689 | /// |
690 | /// Panics if the buffer is not large enough. |
691 | /// A buffer of length 2 is large enough to encode any `char`. |
692 | /// |
693 | /// # Examples |
694 | /// |
695 | /// In both of these examples, '𝕊' takes two `u16`s to encode. |
696 | /// |
697 | /// ``` |
698 | /// let mut b = [0; 2]; |
699 | /// |
700 | /// let result = '𝕊' .encode_utf16(&mut b); |
701 | /// |
702 | /// assert_eq!(result.len(), 2); |
703 | /// ``` |
704 | /// |
705 | /// A buffer that's too small: |
706 | /// |
707 | /// ```should_panic |
708 | /// let mut b = [0; 1]; |
709 | /// |
710 | /// // this panics |
711 | /// '𝕊' .encode_utf16(&mut b); |
712 | /// ``` |
713 | #[stable (feature = "unicode_encode_char" , since = "1.15.0" )] |
714 | #[inline ] |
715 | pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] { |
716 | encode_utf16_raw(self as u32, dst) |
717 | } |
718 | |
719 | /// Returns `true` if this `char` has the `Alphabetic` property. |
720 | /// |
721 | /// `Alphabetic` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and |
722 | /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`]. |
723 | /// |
724 | /// [Unicode Standard]: https://www.unicode.org/versions/latest/ |
725 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
726 | /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt |
727 | /// |
728 | /// # Examples |
729 | /// |
730 | /// Basic usage: |
731 | /// |
732 | /// ``` |
733 | /// assert!('a' .is_alphabetic()); |
734 | /// assert!('京' .is_alphabetic()); |
735 | /// |
736 | /// let c = '💝' ; |
737 | /// // love is many things, but it is not alphabetic |
738 | /// assert!(!c.is_alphabetic()); |
739 | /// ``` |
740 | #[must_use ] |
741 | #[stable (feature = "rust1" , since = "1.0.0" )] |
742 | #[inline ] |
743 | pub fn is_alphabetic(self) -> bool { |
744 | match self { |
745 | 'a' ..='z' | 'A' ..='Z' => true, |
746 | c => c > ' \x7f' && unicode::Alphabetic(c), |
747 | } |
748 | } |
749 | |
750 | /// Returns `true` if this `char` has the `Lowercase` property. |
751 | /// |
752 | /// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and |
753 | /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`]. |
754 | /// |
755 | /// [Unicode Standard]: https://www.unicode.org/versions/latest/ |
756 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
757 | /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt |
758 | /// |
759 | /// # Examples |
760 | /// |
761 | /// Basic usage: |
762 | /// |
763 | /// ``` |
764 | /// assert!('a' .is_lowercase()); |
765 | /// assert!('δ' .is_lowercase()); |
766 | /// assert!(!'A' .is_lowercase()); |
767 | /// assert!(!'Δ' .is_lowercase()); |
768 | /// |
769 | /// // The various Chinese scripts and punctuation do not have case, and so: |
770 | /// assert!(!'中' .is_lowercase()); |
771 | /// assert!(!' ' .is_lowercase()); |
772 | /// ``` |
773 | /// |
774 | /// In a const context: |
775 | /// |
776 | /// ``` |
777 | /// #![feature(const_unicode_case_lookup)] |
778 | /// const CAPITAL_DELTA_IS_LOWERCASE: bool = 'Δ' .is_lowercase(); |
779 | /// assert!(!CAPITAL_DELTA_IS_LOWERCASE); |
780 | /// ``` |
781 | #[must_use ] |
782 | #[stable (feature = "rust1" , since = "1.0.0" )] |
783 | #[rustc_const_unstable (feature = "const_unicode_case_lookup" , issue = "101400" )] |
784 | #[inline ] |
785 | pub const fn is_lowercase(self) -> bool { |
786 | match self { |
787 | 'a' ..='z' => true, |
788 | c => c > ' \x7f' && unicode::Lowercase(c), |
789 | } |
790 | } |
791 | |
792 | /// Returns `true` if this `char` has the `Uppercase` property. |
793 | /// |
794 | /// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and |
795 | /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`]. |
796 | /// |
797 | /// [Unicode Standard]: https://www.unicode.org/versions/latest/ |
798 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
799 | /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt |
800 | /// |
801 | /// # Examples |
802 | /// |
803 | /// Basic usage: |
804 | /// |
805 | /// ``` |
806 | /// assert!(!'a' .is_uppercase()); |
807 | /// assert!(!'δ' .is_uppercase()); |
808 | /// assert!('A' .is_uppercase()); |
809 | /// assert!('Δ' .is_uppercase()); |
810 | /// |
811 | /// // The various Chinese scripts and punctuation do not have case, and so: |
812 | /// assert!(!'中' .is_uppercase()); |
813 | /// assert!(!' ' .is_uppercase()); |
814 | /// ``` |
815 | /// |
816 | /// In a const context: |
817 | /// |
818 | /// ``` |
819 | /// #![feature(const_unicode_case_lookup)] |
820 | /// const CAPITAL_DELTA_IS_UPPERCASE: bool = 'Δ' .is_uppercase(); |
821 | /// assert!(CAPITAL_DELTA_IS_UPPERCASE); |
822 | /// ``` |
823 | #[must_use ] |
824 | #[stable (feature = "rust1" , since = "1.0.0" )] |
825 | #[rustc_const_unstable (feature = "const_unicode_case_lookup" , issue = "101400" )] |
826 | #[inline ] |
827 | pub const fn is_uppercase(self) -> bool { |
828 | match self { |
829 | 'A' ..='Z' => true, |
830 | c => c > ' \x7f' && unicode::Uppercase(c), |
831 | } |
832 | } |
833 | |
834 | /// Returns `true` if this `char` has the `White_Space` property. |
835 | /// |
836 | /// `White_Space` is specified in the [Unicode Character Database][ucd] [`PropList.txt`]. |
837 | /// |
838 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
839 | /// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt |
840 | /// |
841 | /// # Examples |
842 | /// |
843 | /// Basic usage: |
844 | /// |
845 | /// ``` |
846 | /// assert!(' ' .is_whitespace()); |
847 | /// |
848 | /// // line break |
849 | /// assert!(' \n' .is_whitespace()); |
850 | /// |
851 | /// // a non-breaking space |
852 | /// assert!(' \u{A0}' .is_whitespace()); |
853 | /// |
854 | /// assert!(!'越' .is_whitespace()); |
855 | /// ``` |
856 | #[must_use ] |
857 | #[stable (feature = "rust1" , since = "1.0.0" )] |
858 | #[inline ] |
859 | pub fn is_whitespace(self) -> bool { |
860 | match self { |
861 | ' ' | ' \x09' ..=' \x0d' => true, |
862 | c => c > ' \x7f' && unicode::White_Space(c), |
863 | } |
864 | } |
865 | |
866 | /// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`]. |
867 | /// |
868 | /// [`is_alphabetic()`]: #method.is_alphabetic |
869 | /// [`is_numeric()`]: #method.is_numeric |
870 | /// |
871 | /// # Examples |
872 | /// |
873 | /// Basic usage: |
874 | /// |
875 | /// ``` |
876 | /// assert!('٣' .is_alphanumeric()); |
877 | /// assert!('7' .is_alphanumeric()); |
878 | /// assert!('৬' .is_alphanumeric()); |
879 | /// assert!('¾' .is_alphanumeric()); |
880 | /// assert!('①' .is_alphanumeric()); |
881 | /// assert!('K' .is_alphanumeric()); |
882 | /// assert!('و' .is_alphanumeric()); |
883 | /// assert!('藏' .is_alphanumeric()); |
884 | /// ``` |
885 | #[must_use ] |
886 | #[stable (feature = "rust1" , since = "1.0.0" )] |
887 | #[inline ] |
888 | pub fn is_alphanumeric(self) -> bool { |
889 | self.is_alphabetic() || self.is_numeric() |
890 | } |
891 | |
892 | /// Returns `true` if this `char` has the general category for control codes. |
893 | /// |
894 | /// Control codes (code points with the general category of `Cc`) are described in Chapter 4 |
895 | /// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character |
896 | /// Database][ucd] [`UnicodeData.txt`]. |
897 | /// |
898 | /// [Unicode Standard]: https://www.unicode.org/versions/latest/ |
899 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
900 | /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt |
901 | /// |
902 | /// # Examples |
903 | /// |
904 | /// Basic usage: |
905 | /// |
906 | /// ``` |
907 | /// // U+009C, STRING TERMINATOR |
908 | /// assert!('' .is_control()); |
909 | /// assert!(!'q' .is_control()); |
910 | /// ``` |
911 | #[must_use ] |
912 | #[stable (feature = "rust1" , since = "1.0.0" )] |
913 | #[inline ] |
914 | pub fn is_control(self) -> bool { |
915 | unicode::Cc(self) |
916 | } |
917 | |
918 | /// Returns `true` if this `char` has the `Grapheme_Extend` property. |
919 | /// |
920 | /// `Grapheme_Extend` is described in [Unicode Standard Annex #29 (Unicode Text |
921 | /// Segmentation)][uax29] and specified in the [Unicode Character Database][ucd] |
922 | /// [`DerivedCoreProperties.txt`]. |
923 | /// |
924 | /// [uax29]: https://www.unicode.org/reports/tr29/ |
925 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
926 | /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt |
927 | #[must_use ] |
928 | #[inline ] |
929 | pub(crate) fn is_grapheme_extended(self) -> bool { |
930 | unicode::Grapheme_Extend(self) |
931 | } |
932 | |
933 | /// Returns `true` if this `char` has one of the general categories for numbers. |
934 | /// |
935 | /// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric |
936 | /// characters, and `No` for other numeric characters) are specified in the [Unicode Character |
937 | /// Database][ucd] [`UnicodeData.txt`]. |
938 | /// |
939 | /// This method doesn't cover everything that could be considered a number, e.g. ideographic numbers like '三'. |
940 | /// If you want everything including characters with overlapping purposes then you might want to use |
941 | /// a unicode or language-processing library that exposes the appropriate character properties instead |
942 | /// of looking at the unicode categories. |
943 | /// |
944 | /// If you want to parse ASCII decimal digits (0-9) or ASCII base-N, use |
945 | /// `is_ascii_digit` or `is_digit` instead. |
946 | /// |
947 | /// [Unicode Standard]: https://www.unicode.org/versions/latest/ |
948 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
949 | /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt |
950 | /// |
951 | /// # Examples |
952 | /// |
953 | /// Basic usage: |
954 | /// |
955 | /// ``` |
956 | /// assert!('٣' .is_numeric()); |
957 | /// assert!('7' .is_numeric()); |
958 | /// assert!('৬' .is_numeric()); |
959 | /// assert!('¾' .is_numeric()); |
960 | /// assert!('①' .is_numeric()); |
961 | /// assert!(!'K' .is_numeric()); |
962 | /// assert!(!'و' .is_numeric()); |
963 | /// assert!(!'藏' .is_numeric()); |
964 | /// assert!(!'三' .is_numeric()); |
965 | /// ``` |
966 | #[must_use ] |
967 | #[stable (feature = "rust1" , since = "1.0.0" )] |
968 | #[inline ] |
969 | pub fn is_numeric(self) -> bool { |
970 | match self { |
971 | '0' ..='9' => true, |
972 | c => c > ' \x7f' && unicode::N(c), |
973 | } |
974 | } |
975 | |
976 | /// Returns an iterator that yields the lowercase mapping of this `char` as one or more |
977 | /// `char`s. |
978 | /// |
979 | /// If this `char` does not have a lowercase mapping, the iterator yields the same `char`. |
980 | /// |
981 | /// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character |
982 | /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`. |
983 | /// |
984 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
985 | /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt |
986 | /// |
987 | /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields |
988 | /// the `char`(s) given by [`SpecialCasing.txt`]. |
989 | /// |
990 | /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt |
991 | /// |
992 | /// This operation performs an unconditional mapping without tailoring. That is, the conversion |
993 | /// is independent of context and language. |
994 | /// |
995 | /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in |
996 | /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion. |
997 | /// |
998 | /// [Unicode Standard]: https://www.unicode.org/versions/latest/ |
999 | /// |
1000 | /// # Examples |
1001 | /// |
1002 | /// As an iterator: |
1003 | /// |
1004 | /// ``` |
1005 | /// for c in 'İ' .to_lowercase() { |
1006 | /// print!("{c}" ); |
1007 | /// } |
1008 | /// println!(); |
1009 | /// ``` |
1010 | /// |
1011 | /// Using `println!` directly: |
1012 | /// |
1013 | /// ``` |
1014 | /// println!("{}" , 'İ' .to_lowercase()); |
1015 | /// ``` |
1016 | /// |
1017 | /// Both are equivalent to: |
1018 | /// |
1019 | /// ``` |
1020 | /// println!("i \u{307}" ); |
1021 | /// ``` |
1022 | /// |
1023 | /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string): |
1024 | /// |
1025 | /// ``` |
1026 | /// assert_eq!('C' .to_lowercase().to_string(), "c" ); |
1027 | /// |
1028 | /// // Sometimes the result is more than one character: |
1029 | /// assert_eq!('İ' .to_lowercase().to_string(), "i \u{307}" ); |
1030 | /// |
1031 | /// // Characters that do not have both uppercase and lowercase |
1032 | /// // convert into themselves. |
1033 | /// assert_eq!('山' .to_lowercase().to_string(), "山" ); |
1034 | /// ``` |
1035 | #[must_use = "this returns the lowercase character as a new iterator, \ |
1036 | without modifying the original" ] |
1037 | #[stable (feature = "rust1" , since = "1.0.0" )] |
1038 | #[inline ] |
1039 | pub fn to_lowercase(self) -> ToLowercase { |
1040 | ToLowercase(CaseMappingIter::new(conversions::to_lower(self))) |
1041 | } |
1042 | |
1043 | /// Returns an iterator that yields the uppercase mapping of this `char` as one or more |
1044 | /// `char`s. |
1045 | /// |
1046 | /// If this `char` does not have an uppercase mapping, the iterator yields the same `char`. |
1047 | /// |
1048 | /// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character |
1049 | /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`. |
1050 | /// |
1051 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
1052 | /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt |
1053 | /// |
1054 | /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields |
1055 | /// the `char`(s) given by [`SpecialCasing.txt`]. |
1056 | /// |
1057 | /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt |
1058 | /// |
1059 | /// This operation performs an unconditional mapping without tailoring. That is, the conversion |
1060 | /// is independent of context and language. |
1061 | /// |
1062 | /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in |
1063 | /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion. |
1064 | /// |
1065 | /// [Unicode Standard]: https://www.unicode.org/versions/latest/ |
1066 | /// |
1067 | /// # Examples |
1068 | /// |
1069 | /// As an iterator: |
1070 | /// |
1071 | /// ``` |
1072 | /// for c in 'ß' .to_uppercase() { |
1073 | /// print!("{c}" ); |
1074 | /// } |
1075 | /// println!(); |
1076 | /// ``` |
1077 | /// |
1078 | /// Using `println!` directly: |
1079 | /// |
1080 | /// ``` |
1081 | /// println!("{}" , 'ß' .to_uppercase()); |
1082 | /// ``` |
1083 | /// |
1084 | /// Both are equivalent to: |
1085 | /// |
1086 | /// ``` |
1087 | /// println!("SS" ); |
1088 | /// ``` |
1089 | /// |
1090 | /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string): |
1091 | /// |
1092 | /// ``` |
1093 | /// assert_eq!('c' .to_uppercase().to_string(), "C" ); |
1094 | /// |
1095 | /// // Sometimes the result is more than one character: |
1096 | /// assert_eq!('ß' .to_uppercase().to_string(), "SS" ); |
1097 | /// |
1098 | /// // Characters that do not have both uppercase and lowercase |
1099 | /// // convert into themselves. |
1100 | /// assert_eq!('山' .to_uppercase().to_string(), "山" ); |
1101 | /// ``` |
1102 | /// |
1103 | /// # Note on locale |
1104 | /// |
1105 | /// In Turkish, the equivalent of 'i' in Latin has five forms instead of two: |
1106 | /// |
1107 | /// * 'Dotless': I / ı, sometimes written ï |
1108 | /// * 'Dotted': İ / i |
1109 | /// |
1110 | /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore: |
1111 | /// |
1112 | /// ``` |
1113 | /// let upper_i = 'i' .to_uppercase().to_string(); |
1114 | /// ``` |
1115 | /// |
1116 | /// The value of `upper_i` here relies on the language of the text: if we're |
1117 | /// in `en-US`, it should be `"I"`, but if we're in `tr_TR`, it should |
1118 | /// be `"İ"`. `to_uppercase()` does not take this into account, and so: |
1119 | /// |
1120 | /// ``` |
1121 | /// let upper_i = 'i' .to_uppercase().to_string(); |
1122 | /// |
1123 | /// assert_eq!(upper_i, "I" ); |
1124 | /// ``` |
1125 | /// |
1126 | /// holds across languages. |
1127 | #[must_use = "this returns the uppercase character as a new iterator, \ |
1128 | without modifying the original" ] |
1129 | #[stable (feature = "rust1" , since = "1.0.0" )] |
1130 | #[inline ] |
1131 | pub fn to_uppercase(self) -> ToUppercase { |
1132 | ToUppercase(CaseMappingIter::new(conversions::to_upper(self))) |
1133 | } |
1134 | |
1135 | /// Checks if the value is within the ASCII range. |
1136 | /// |
1137 | /// # Examples |
1138 | /// |
1139 | /// ``` |
1140 | /// let ascii = 'a' ; |
1141 | /// let non_ascii = '❤' ; |
1142 | /// |
1143 | /// assert!(ascii.is_ascii()); |
1144 | /// assert!(!non_ascii.is_ascii()); |
1145 | /// ``` |
1146 | #[must_use ] |
1147 | #[stable (feature = "ascii_methods_on_intrinsics" , since = "1.23.0" )] |
1148 | #[rustc_const_stable (feature = "const_char_is_ascii" , since = "1.32.0" )] |
1149 | #[inline ] |
1150 | pub const fn is_ascii(&self) -> bool { |
1151 | *self as u32 <= 0x7F |
1152 | } |
1153 | |
1154 | /// Returns `Some` if the value is within the ASCII range, |
1155 | /// or `None` if it's not. |
1156 | /// |
1157 | /// This is preferred to [`Self::is_ascii`] when you're passing the value |
1158 | /// along to something else that can take [`ascii::Char`] rather than |
1159 | /// needing to check again for itself whether the value is in ASCII. |
1160 | #[must_use ] |
1161 | #[unstable (feature = "ascii_char" , issue = "110998" )] |
1162 | #[inline ] |
1163 | pub const fn as_ascii(&self) -> Option<ascii::Char> { |
1164 | if self.is_ascii() { |
1165 | // SAFETY: Just checked that this is ASCII. |
1166 | Some(unsafe { ascii::Char::from_u8_unchecked(*self as u8) }) |
1167 | } else { |
1168 | None |
1169 | } |
1170 | } |
1171 | |
1172 | /// Makes a copy of the value in its ASCII upper case equivalent. |
1173 | /// |
1174 | /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', |
1175 | /// but non-ASCII letters are unchanged. |
1176 | /// |
1177 | /// To uppercase the value in-place, use [`make_ascii_uppercase()`]. |
1178 | /// |
1179 | /// To uppercase ASCII characters in addition to non-ASCII characters, use |
1180 | /// [`to_uppercase()`]. |
1181 | /// |
1182 | /// # Examples |
1183 | /// |
1184 | /// ``` |
1185 | /// let ascii = 'a' ; |
1186 | /// let non_ascii = '❤' ; |
1187 | /// |
1188 | /// assert_eq!('A' , ascii.to_ascii_uppercase()); |
1189 | /// assert_eq!('❤' , non_ascii.to_ascii_uppercase()); |
1190 | /// ``` |
1191 | /// |
1192 | /// [`make_ascii_uppercase()`]: #method.make_ascii_uppercase |
1193 | /// [`to_uppercase()`]: #method.to_uppercase |
1194 | #[must_use = "to uppercase the value in-place, use `make_ascii_uppercase()`" ] |
1195 | #[stable (feature = "ascii_methods_on_intrinsics" , since = "1.23.0" )] |
1196 | #[rustc_const_stable (feature = "const_ascii_methods_on_intrinsics" , since = "1.52.0" )] |
1197 | #[inline ] |
1198 | pub const fn to_ascii_uppercase(&self) -> char { |
1199 | if self.is_ascii_lowercase() { |
1200 | (*self as u8).ascii_change_case_unchecked() as char |
1201 | } else { |
1202 | *self |
1203 | } |
1204 | } |
1205 | |
1206 | /// Makes a copy of the value in its ASCII lower case equivalent. |
1207 | /// |
1208 | /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', |
1209 | /// but non-ASCII letters are unchanged. |
1210 | /// |
1211 | /// To lowercase the value in-place, use [`make_ascii_lowercase()`]. |
1212 | /// |
1213 | /// To lowercase ASCII characters in addition to non-ASCII characters, use |
1214 | /// [`to_lowercase()`]. |
1215 | /// |
1216 | /// # Examples |
1217 | /// |
1218 | /// ``` |
1219 | /// let ascii = 'A' ; |
1220 | /// let non_ascii = '❤' ; |
1221 | /// |
1222 | /// assert_eq!('a' , ascii.to_ascii_lowercase()); |
1223 | /// assert_eq!('❤' , non_ascii.to_ascii_lowercase()); |
1224 | /// ``` |
1225 | /// |
1226 | /// [`make_ascii_lowercase()`]: #method.make_ascii_lowercase |
1227 | /// [`to_lowercase()`]: #method.to_lowercase |
1228 | #[must_use = "to lowercase the value in-place, use `make_ascii_lowercase()`" ] |
1229 | #[stable (feature = "ascii_methods_on_intrinsics" , since = "1.23.0" )] |
1230 | #[rustc_const_stable (feature = "const_ascii_methods_on_intrinsics" , since = "1.52.0" )] |
1231 | #[inline ] |
1232 | pub const fn to_ascii_lowercase(&self) -> char { |
1233 | if self.is_ascii_uppercase() { |
1234 | (*self as u8).ascii_change_case_unchecked() as char |
1235 | } else { |
1236 | *self |
1237 | } |
1238 | } |
1239 | |
1240 | /// Checks that two values are an ASCII case-insensitive match. |
1241 | /// |
1242 | /// Equivalent to <code>[to_ascii_lowercase]\(a) == [to_ascii_lowercase]\(b)</code>. |
1243 | /// |
1244 | /// # Examples |
1245 | /// |
1246 | /// ``` |
1247 | /// let upper_a = 'A' ; |
1248 | /// let lower_a = 'a' ; |
1249 | /// let lower_z = 'z' ; |
1250 | /// |
1251 | /// assert!(upper_a.eq_ignore_ascii_case(&lower_a)); |
1252 | /// assert!(upper_a.eq_ignore_ascii_case(&upper_a)); |
1253 | /// assert!(!upper_a.eq_ignore_ascii_case(&lower_z)); |
1254 | /// ``` |
1255 | /// |
1256 | /// [to_ascii_lowercase]: #method.to_ascii_lowercase |
1257 | #[stable (feature = "ascii_methods_on_intrinsics" , since = "1.23.0" )] |
1258 | #[rustc_const_stable (feature = "const_ascii_methods_on_intrinsics" , since = "1.52.0" )] |
1259 | #[inline ] |
1260 | pub const fn eq_ignore_ascii_case(&self, other: &char) -> bool { |
1261 | self.to_ascii_lowercase() == other.to_ascii_lowercase() |
1262 | } |
1263 | |
1264 | /// Converts this type to its ASCII upper case equivalent in-place. |
1265 | /// |
1266 | /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', |
1267 | /// but non-ASCII letters are unchanged. |
1268 | /// |
1269 | /// To return a new uppercased value without modifying the existing one, use |
1270 | /// [`to_ascii_uppercase()`]. |
1271 | /// |
1272 | /// # Examples |
1273 | /// |
1274 | /// ``` |
1275 | /// let mut ascii = 'a' ; |
1276 | /// |
1277 | /// ascii.make_ascii_uppercase(); |
1278 | /// |
1279 | /// assert_eq!('A' , ascii); |
1280 | /// ``` |
1281 | /// |
1282 | /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase |
1283 | #[stable (feature = "ascii_methods_on_intrinsics" , since = "1.23.0" )] |
1284 | #[inline ] |
1285 | pub fn make_ascii_uppercase(&mut self) { |
1286 | *self = self.to_ascii_uppercase(); |
1287 | } |
1288 | |
1289 | /// Converts this type to its ASCII lower case equivalent in-place. |
1290 | /// |
1291 | /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', |
1292 | /// but non-ASCII letters are unchanged. |
1293 | /// |
1294 | /// To return a new lowercased value without modifying the existing one, use |
1295 | /// [`to_ascii_lowercase()`]. |
1296 | /// |
1297 | /// # Examples |
1298 | /// |
1299 | /// ``` |
1300 | /// let mut ascii = 'A' ; |
1301 | /// |
1302 | /// ascii.make_ascii_lowercase(); |
1303 | /// |
1304 | /// assert_eq!('a' , ascii); |
1305 | /// ``` |
1306 | /// |
1307 | /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase |
1308 | #[stable (feature = "ascii_methods_on_intrinsics" , since = "1.23.0" )] |
1309 | #[inline ] |
1310 | pub fn make_ascii_lowercase(&mut self) { |
1311 | *self = self.to_ascii_lowercase(); |
1312 | } |
1313 | |
1314 | /// Checks if the value is an ASCII alphabetic character: |
1315 | /// |
1316 | /// - U+0041 'A' ..= U+005A 'Z', or |
1317 | /// - U+0061 'a' ..= U+007A 'z'. |
1318 | /// |
1319 | /// # Examples |
1320 | /// |
1321 | /// ``` |
1322 | /// let uppercase_a = 'A' ; |
1323 | /// let uppercase_g = 'G' ; |
1324 | /// let a = 'a' ; |
1325 | /// let g = 'g' ; |
1326 | /// let zero = '0' ; |
1327 | /// let percent = '%' ; |
1328 | /// let space = ' ' ; |
1329 | /// let lf = ' \n' ; |
1330 | /// let esc = ' \x1b' ; |
1331 | /// |
1332 | /// assert!(uppercase_a.is_ascii_alphabetic()); |
1333 | /// assert!(uppercase_g.is_ascii_alphabetic()); |
1334 | /// assert!(a.is_ascii_alphabetic()); |
1335 | /// assert!(g.is_ascii_alphabetic()); |
1336 | /// assert!(!zero.is_ascii_alphabetic()); |
1337 | /// assert!(!percent.is_ascii_alphabetic()); |
1338 | /// assert!(!space.is_ascii_alphabetic()); |
1339 | /// assert!(!lf.is_ascii_alphabetic()); |
1340 | /// assert!(!esc.is_ascii_alphabetic()); |
1341 | /// ``` |
1342 | #[must_use ] |
1343 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
1344 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
1345 | #[inline ] |
1346 | pub const fn is_ascii_alphabetic(&self) -> bool { |
1347 | matches!(*self, 'A' ..='Z' | 'a' ..='z' ) |
1348 | } |
1349 | |
1350 | /// Checks if the value is an ASCII uppercase character: |
1351 | /// U+0041 'A' ..= U+005A 'Z'. |
1352 | /// |
1353 | /// # Examples |
1354 | /// |
1355 | /// ``` |
1356 | /// let uppercase_a = 'A' ; |
1357 | /// let uppercase_g = 'G' ; |
1358 | /// let a = 'a' ; |
1359 | /// let g = 'g' ; |
1360 | /// let zero = '0' ; |
1361 | /// let percent = '%' ; |
1362 | /// let space = ' ' ; |
1363 | /// let lf = ' \n' ; |
1364 | /// let esc = ' \x1b' ; |
1365 | /// |
1366 | /// assert!(uppercase_a.is_ascii_uppercase()); |
1367 | /// assert!(uppercase_g.is_ascii_uppercase()); |
1368 | /// assert!(!a.is_ascii_uppercase()); |
1369 | /// assert!(!g.is_ascii_uppercase()); |
1370 | /// assert!(!zero.is_ascii_uppercase()); |
1371 | /// assert!(!percent.is_ascii_uppercase()); |
1372 | /// assert!(!space.is_ascii_uppercase()); |
1373 | /// assert!(!lf.is_ascii_uppercase()); |
1374 | /// assert!(!esc.is_ascii_uppercase()); |
1375 | /// ``` |
1376 | #[must_use ] |
1377 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
1378 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
1379 | #[inline ] |
1380 | pub const fn is_ascii_uppercase(&self) -> bool { |
1381 | matches!(*self, 'A' ..='Z' ) |
1382 | } |
1383 | |
1384 | /// Checks if the value is an ASCII lowercase character: |
1385 | /// U+0061 'a' ..= U+007A 'z'. |
1386 | /// |
1387 | /// # Examples |
1388 | /// |
1389 | /// ``` |
1390 | /// let uppercase_a = 'A' ; |
1391 | /// let uppercase_g = 'G' ; |
1392 | /// let a = 'a' ; |
1393 | /// let g = 'g' ; |
1394 | /// let zero = '0' ; |
1395 | /// let percent = '%' ; |
1396 | /// let space = ' ' ; |
1397 | /// let lf = ' \n' ; |
1398 | /// let esc = ' \x1b' ; |
1399 | /// |
1400 | /// assert!(!uppercase_a.is_ascii_lowercase()); |
1401 | /// assert!(!uppercase_g.is_ascii_lowercase()); |
1402 | /// assert!(a.is_ascii_lowercase()); |
1403 | /// assert!(g.is_ascii_lowercase()); |
1404 | /// assert!(!zero.is_ascii_lowercase()); |
1405 | /// assert!(!percent.is_ascii_lowercase()); |
1406 | /// assert!(!space.is_ascii_lowercase()); |
1407 | /// assert!(!lf.is_ascii_lowercase()); |
1408 | /// assert!(!esc.is_ascii_lowercase()); |
1409 | /// ``` |
1410 | #[must_use ] |
1411 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
1412 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
1413 | #[inline ] |
1414 | pub const fn is_ascii_lowercase(&self) -> bool { |
1415 | matches!(*self, 'a' ..='z' ) |
1416 | } |
1417 | |
1418 | /// Checks if the value is an ASCII alphanumeric character: |
1419 | /// |
1420 | /// - U+0041 'A' ..= U+005A 'Z', or |
1421 | /// - U+0061 'a' ..= U+007A 'z', or |
1422 | /// - U+0030 '0' ..= U+0039 '9'. |
1423 | /// |
1424 | /// # Examples |
1425 | /// |
1426 | /// ``` |
1427 | /// let uppercase_a = 'A' ; |
1428 | /// let uppercase_g = 'G' ; |
1429 | /// let a = 'a' ; |
1430 | /// let g = 'g' ; |
1431 | /// let zero = '0' ; |
1432 | /// let percent = '%' ; |
1433 | /// let space = ' ' ; |
1434 | /// let lf = ' \n' ; |
1435 | /// let esc = ' \x1b' ; |
1436 | /// |
1437 | /// assert!(uppercase_a.is_ascii_alphanumeric()); |
1438 | /// assert!(uppercase_g.is_ascii_alphanumeric()); |
1439 | /// assert!(a.is_ascii_alphanumeric()); |
1440 | /// assert!(g.is_ascii_alphanumeric()); |
1441 | /// assert!(zero.is_ascii_alphanumeric()); |
1442 | /// assert!(!percent.is_ascii_alphanumeric()); |
1443 | /// assert!(!space.is_ascii_alphanumeric()); |
1444 | /// assert!(!lf.is_ascii_alphanumeric()); |
1445 | /// assert!(!esc.is_ascii_alphanumeric()); |
1446 | /// ``` |
1447 | #[must_use ] |
1448 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
1449 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
1450 | #[inline ] |
1451 | pub const fn is_ascii_alphanumeric(&self) -> bool { |
1452 | matches!(*self, '0' ..='9' ) | matches!(*self, 'A' ..='Z' ) | matches!(*self, 'a' ..='z' ) |
1453 | } |
1454 | |
1455 | /// Checks if the value is an ASCII decimal digit: |
1456 | /// U+0030 '0' ..= U+0039 '9'. |
1457 | /// |
1458 | /// # Examples |
1459 | /// |
1460 | /// ``` |
1461 | /// let uppercase_a = 'A' ; |
1462 | /// let uppercase_g = 'G' ; |
1463 | /// let a = 'a' ; |
1464 | /// let g = 'g' ; |
1465 | /// let zero = '0' ; |
1466 | /// let percent = '%' ; |
1467 | /// let space = ' ' ; |
1468 | /// let lf = ' \n' ; |
1469 | /// let esc = ' \x1b' ; |
1470 | /// |
1471 | /// assert!(!uppercase_a.is_ascii_digit()); |
1472 | /// assert!(!uppercase_g.is_ascii_digit()); |
1473 | /// assert!(!a.is_ascii_digit()); |
1474 | /// assert!(!g.is_ascii_digit()); |
1475 | /// assert!(zero.is_ascii_digit()); |
1476 | /// assert!(!percent.is_ascii_digit()); |
1477 | /// assert!(!space.is_ascii_digit()); |
1478 | /// assert!(!lf.is_ascii_digit()); |
1479 | /// assert!(!esc.is_ascii_digit()); |
1480 | /// ``` |
1481 | #[must_use ] |
1482 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
1483 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
1484 | #[inline ] |
1485 | pub const fn is_ascii_digit(&self) -> bool { |
1486 | matches!(*self, '0' ..='9' ) |
1487 | } |
1488 | |
1489 | /// Checks if the value is an ASCII octal digit: |
1490 | /// U+0030 '0' ..= U+0037 '7'. |
1491 | /// |
1492 | /// # Examples |
1493 | /// |
1494 | /// ``` |
1495 | /// #![feature(is_ascii_octdigit)] |
1496 | /// |
1497 | /// let uppercase_a = 'A' ; |
1498 | /// let a = 'a' ; |
1499 | /// let zero = '0' ; |
1500 | /// let seven = '7' ; |
1501 | /// let nine = '9' ; |
1502 | /// let percent = '%' ; |
1503 | /// let lf = ' \n' ; |
1504 | /// |
1505 | /// assert!(!uppercase_a.is_ascii_octdigit()); |
1506 | /// assert!(!a.is_ascii_octdigit()); |
1507 | /// assert!(zero.is_ascii_octdigit()); |
1508 | /// assert!(seven.is_ascii_octdigit()); |
1509 | /// assert!(!nine.is_ascii_octdigit()); |
1510 | /// assert!(!percent.is_ascii_octdigit()); |
1511 | /// assert!(!lf.is_ascii_octdigit()); |
1512 | /// ``` |
1513 | #[must_use ] |
1514 | #[unstable (feature = "is_ascii_octdigit" , issue = "101288" )] |
1515 | #[rustc_const_unstable (feature = "is_ascii_octdigit" , issue = "101288" )] |
1516 | #[inline ] |
1517 | pub const fn is_ascii_octdigit(&self) -> bool { |
1518 | matches!(*self, '0' ..='7' ) |
1519 | } |
1520 | |
1521 | /// Checks if the value is an ASCII hexadecimal digit: |
1522 | /// |
1523 | /// - U+0030 '0' ..= U+0039 '9', or |
1524 | /// - U+0041 'A' ..= U+0046 'F', or |
1525 | /// - U+0061 'a' ..= U+0066 'f'. |
1526 | /// |
1527 | /// # Examples |
1528 | /// |
1529 | /// ``` |
1530 | /// let uppercase_a = 'A' ; |
1531 | /// let uppercase_g = 'G' ; |
1532 | /// let a = 'a' ; |
1533 | /// let g = 'g' ; |
1534 | /// let zero = '0' ; |
1535 | /// let percent = '%' ; |
1536 | /// let space = ' ' ; |
1537 | /// let lf = ' \n' ; |
1538 | /// let esc = ' \x1b' ; |
1539 | /// |
1540 | /// assert!(uppercase_a.is_ascii_hexdigit()); |
1541 | /// assert!(!uppercase_g.is_ascii_hexdigit()); |
1542 | /// assert!(a.is_ascii_hexdigit()); |
1543 | /// assert!(!g.is_ascii_hexdigit()); |
1544 | /// assert!(zero.is_ascii_hexdigit()); |
1545 | /// assert!(!percent.is_ascii_hexdigit()); |
1546 | /// assert!(!space.is_ascii_hexdigit()); |
1547 | /// assert!(!lf.is_ascii_hexdigit()); |
1548 | /// assert!(!esc.is_ascii_hexdigit()); |
1549 | /// ``` |
1550 | #[must_use ] |
1551 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
1552 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
1553 | #[inline ] |
1554 | pub const fn is_ascii_hexdigit(&self) -> bool { |
1555 | matches!(*self, '0' ..='9' ) | matches!(*self, 'A' ..='F' ) | matches!(*self, 'a' ..='f' ) |
1556 | } |
1557 | |
1558 | /// Checks if the value is an ASCII punctuation character: |
1559 | /// |
1560 | /// - U+0021 ..= U+002F `! " # $ % & ' ( ) * + , - . /`, or |
1561 | /// - U+003A ..= U+0040 `: ; < = > ? @`, or |
1562 | /// - U+005B ..= U+0060 ``[ \ ] ^ _ ` ``, or |
1563 | /// - U+007B ..= U+007E `{ | } ~` |
1564 | /// |
1565 | /// # Examples |
1566 | /// |
1567 | /// ``` |
1568 | /// let uppercase_a = 'A' ; |
1569 | /// let uppercase_g = 'G' ; |
1570 | /// let a = 'a' ; |
1571 | /// let g = 'g' ; |
1572 | /// let zero = '0' ; |
1573 | /// let percent = '%' ; |
1574 | /// let space = ' ' ; |
1575 | /// let lf = ' \n' ; |
1576 | /// let esc = ' \x1b' ; |
1577 | /// |
1578 | /// assert!(!uppercase_a.is_ascii_punctuation()); |
1579 | /// assert!(!uppercase_g.is_ascii_punctuation()); |
1580 | /// assert!(!a.is_ascii_punctuation()); |
1581 | /// assert!(!g.is_ascii_punctuation()); |
1582 | /// assert!(!zero.is_ascii_punctuation()); |
1583 | /// assert!(percent.is_ascii_punctuation()); |
1584 | /// assert!(!space.is_ascii_punctuation()); |
1585 | /// assert!(!lf.is_ascii_punctuation()); |
1586 | /// assert!(!esc.is_ascii_punctuation()); |
1587 | /// ``` |
1588 | #[must_use ] |
1589 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
1590 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
1591 | #[inline ] |
1592 | pub const fn is_ascii_punctuation(&self) -> bool { |
1593 | matches!(*self, '!' ..='/' ) |
1594 | | matches!(*self, ':' ..='@' ) |
1595 | | matches!(*self, '[' ..='`' ) |
1596 | | matches!(*self, '{' ..='~' ) |
1597 | } |
1598 | |
1599 | /// Checks if the value is an ASCII graphic character: |
1600 | /// U+0021 '!' ..= U+007E '~'. |
1601 | /// |
1602 | /// # Examples |
1603 | /// |
1604 | /// ``` |
1605 | /// let uppercase_a = 'A' ; |
1606 | /// let uppercase_g = 'G' ; |
1607 | /// let a = 'a' ; |
1608 | /// let g = 'g' ; |
1609 | /// let zero = '0' ; |
1610 | /// let percent = '%' ; |
1611 | /// let space = ' ' ; |
1612 | /// let lf = ' \n' ; |
1613 | /// let esc = ' \x1b' ; |
1614 | /// |
1615 | /// assert!(uppercase_a.is_ascii_graphic()); |
1616 | /// assert!(uppercase_g.is_ascii_graphic()); |
1617 | /// assert!(a.is_ascii_graphic()); |
1618 | /// assert!(g.is_ascii_graphic()); |
1619 | /// assert!(zero.is_ascii_graphic()); |
1620 | /// assert!(percent.is_ascii_graphic()); |
1621 | /// assert!(!space.is_ascii_graphic()); |
1622 | /// assert!(!lf.is_ascii_graphic()); |
1623 | /// assert!(!esc.is_ascii_graphic()); |
1624 | /// ``` |
1625 | #[must_use ] |
1626 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
1627 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
1628 | #[inline ] |
1629 | pub const fn is_ascii_graphic(&self) -> bool { |
1630 | matches!(*self, '!' ..='~' ) |
1631 | } |
1632 | |
1633 | /// Checks if the value is an ASCII whitespace character: |
1634 | /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED, |
1635 | /// U+000C FORM FEED, or U+000D CARRIAGE RETURN. |
1636 | /// |
1637 | /// Rust uses the WhatWG Infra Standard's [definition of ASCII |
1638 | /// whitespace][infra-aw]. There are several other definitions in |
1639 | /// wide use. For instance, [the POSIX locale][pct] includes |
1640 | /// U+000B VERTICAL TAB as well as all the above characters, |
1641 | /// but—from the very same specification—[the default rule for |
1642 | /// "field splitting" in the Bourne shell][bfs] considers *only* |
1643 | /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace. |
1644 | /// |
1645 | /// If you are writing a program that will process an existing |
1646 | /// file format, check what that format's definition of whitespace is |
1647 | /// before using this function. |
1648 | /// |
1649 | /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace |
1650 | /// [pct]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01 |
1651 | /// [bfs]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05 |
1652 | /// |
1653 | /// # Examples |
1654 | /// |
1655 | /// ``` |
1656 | /// let uppercase_a = 'A' ; |
1657 | /// let uppercase_g = 'G' ; |
1658 | /// let a = 'a' ; |
1659 | /// let g = 'g' ; |
1660 | /// let zero = '0' ; |
1661 | /// let percent = '%' ; |
1662 | /// let space = ' ' ; |
1663 | /// let lf = ' \n' ; |
1664 | /// let esc = ' \x1b' ; |
1665 | /// |
1666 | /// assert!(!uppercase_a.is_ascii_whitespace()); |
1667 | /// assert!(!uppercase_g.is_ascii_whitespace()); |
1668 | /// assert!(!a.is_ascii_whitespace()); |
1669 | /// assert!(!g.is_ascii_whitespace()); |
1670 | /// assert!(!zero.is_ascii_whitespace()); |
1671 | /// assert!(!percent.is_ascii_whitespace()); |
1672 | /// assert!(space.is_ascii_whitespace()); |
1673 | /// assert!(lf.is_ascii_whitespace()); |
1674 | /// assert!(!esc.is_ascii_whitespace()); |
1675 | /// ``` |
1676 | #[must_use ] |
1677 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
1678 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
1679 | #[inline ] |
1680 | pub const fn is_ascii_whitespace(&self) -> bool { |
1681 | matches!(*self, ' \t' | ' \n' | ' \x0C' | ' \r' | ' ' ) |
1682 | } |
1683 | |
1684 | /// Checks if the value is an ASCII control character: |
1685 | /// U+0000 NUL ..= U+001F UNIT SEPARATOR, or U+007F DELETE. |
1686 | /// Note that most ASCII whitespace characters are control |
1687 | /// characters, but SPACE is not. |
1688 | /// |
1689 | /// # Examples |
1690 | /// |
1691 | /// ``` |
1692 | /// let uppercase_a = 'A' ; |
1693 | /// let uppercase_g = 'G' ; |
1694 | /// let a = 'a' ; |
1695 | /// let g = 'g' ; |
1696 | /// let zero = '0' ; |
1697 | /// let percent = '%' ; |
1698 | /// let space = ' ' ; |
1699 | /// let lf = ' \n' ; |
1700 | /// let esc = ' \x1b' ; |
1701 | /// |
1702 | /// assert!(!uppercase_a.is_ascii_control()); |
1703 | /// assert!(!uppercase_g.is_ascii_control()); |
1704 | /// assert!(!a.is_ascii_control()); |
1705 | /// assert!(!g.is_ascii_control()); |
1706 | /// assert!(!zero.is_ascii_control()); |
1707 | /// assert!(!percent.is_ascii_control()); |
1708 | /// assert!(!space.is_ascii_control()); |
1709 | /// assert!(lf.is_ascii_control()); |
1710 | /// assert!(esc.is_ascii_control()); |
1711 | /// ``` |
1712 | #[must_use ] |
1713 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
1714 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
1715 | #[inline ] |
1716 | pub const fn is_ascii_control(&self) -> bool { |
1717 | matches!(*self, ' \0' ..=' \x1F' | ' \x7F' ) |
1718 | } |
1719 | } |
1720 | |
1721 | pub(crate) struct EscapeDebugExtArgs { |
1722 | /// Escape Extended Grapheme codepoints? |
1723 | pub(crate) escape_grapheme_extended: bool, |
1724 | |
1725 | /// Escape single quotes? |
1726 | pub(crate) escape_single_quote: bool, |
1727 | |
1728 | /// Escape double quotes? |
1729 | pub(crate) escape_double_quote: bool, |
1730 | } |
1731 | |
1732 | impl EscapeDebugExtArgs { |
1733 | pub(crate) const ESCAPE_ALL: Self = Self { |
1734 | escape_grapheme_extended: true, |
1735 | escape_single_quote: true, |
1736 | escape_double_quote: true, |
1737 | }; |
1738 | } |
1739 | |
1740 | #[inline ] |
1741 | const fn len_utf8(code: u32) -> usize { |
1742 | if code < MAX_ONE_B { |
1743 | 1 |
1744 | } else if code < MAX_TWO_B { |
1745 | 2 |
1746 | } else if code < MAX_THREE_B { |
1747 | 3 |
1748 | } else { |
1749 | 4 |
1750 | } |
1751 | } |
1752 | |
1753 | /// Encodes a raw u32 value as UTF-8 into the provided byte buffer, |
1754 | /// and then returns the subslice of the buffer that contains the encoded character. |
1755 | /// |
1756 | /// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range. |
1757 | /// (Creating a `char` in the surrogate range is UB.) |
1758 | /// The result is valid [generalized UTF-8] but not valid UTF-8. |
1759 | /// |
1760 | /// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8 |
1761 | /// |
1762 | /// # Panics |
1763 | /// |
1764 | /// Panics if the buffer is not large enough. |
1765 | /// A buffer of length four is large enough to encode any `char`. |
1766 | #[unstable (feature = "char_internals" , reason = "exposed only for libstd" , issue = "none" )] |
1767 | #[doc (hidden)] |
1768 | #[inline ] |
1769 | pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] { |
1770 | let len = len_utf8(code); |
1771 | match (len, &mut dst[..]) { |
1772 | (1, [a, ..]) => { |
1773 | *a = code as u8; |
1774 | } |
1775 | (2, [a, b, ..]) => { |
1776 | *a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; |
1777 | *b = (code & 0x3F) as u8 | TAG_CONT; |
1778 | } |
1779 | (3, [a, b, c, ..]) => { |
1780 | *a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; |
1781 | *b = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
1782 | *c = (code & 0x3F) as u8 | TAG_CONT; |
1783 | } |
1784 | (4, [a, b, c, d, ..]) => { |
1785 | *a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; |
1786 | *b = (code >> 12 & 0x3F) as u8 | TAG_CONT; |
1787 | *c = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
1788 | *d = (code & 0x3F) as u8 | TAG_CONT; |
1789 | } |
1790 | _ => panic!( |
1791 | "encode_utf8: need {} bytes to encode U+ {:X}, but the buffer has {}" , |
1792 | len, |
1793 | code, |
1794 | dst.len(), |
1795 | ), |
1796 | }; |
1797 | &mut dst[..len] |
1798 | } |
1799 | |
1800 | /// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer, |
1801 | /// and then returns the subslice of the buffer that contains the encoded character. |
1802 | /// |
1803 | /// Unlike `char::encode_utf16`, this method also handles codepoints in the surrogate range. |
1804 | /// (Creating a `char` in the surrogate range is UB.) |
1805 | /// |
1806 | /// # Panics |
1807 | /// |
1808 | /// Panics if the buffer is not large enough. |
1809 | /// A buffer of length 2 is large enough to encode any `char`. |
1810 | #[unstable (feature = "char_internals" , reason = "exposed only for libstd" , issue = "none" )] |
1811 | #[doc (hidden)] |
1812 | #[inline ] |
1813 | pub fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] { |
1814 | // SAFETY: each arm checks whether there are enough bits to write into |
1815 | unsafe { |
1816 | if (code & 0xFFFF) == code && !dst.is_empty() { |
1817 | // The BMP falls through |
1818 | *dst.get_unchecked_mut(index:0) = code as u16; |
1819 | slice::from_raw_parts_mut(data:dst.as_mut_ptr(), len:1) |
1820 | } else if dst.len() >= 2 { |
1821 | // Supplementary planes break into surrogates. |
1822 | code -= 0x1_0000; |
1823 | *dst.get_unchecked_mut(index:0) = 0xD800 | ((code >> 10) as u16); |
1824 | *dst.get_unchecked_mut(index:1) = 0xDC00 | ((code as u16) & 0x3FF); |
1825 | slice::from_raw_parts_mut(data:dst.as_mut_ptr(), len:2) |
1826 | } else { |
1827 | panic!( |
1828 | "encode_utf16: need {} units to encode U+ {:X}, but the buffer has {}" , |
1829 | char::from_u32_unchecked(code).len_utf16(), |
1830 | code, |
1831 | dst.len(), |
1832 | ) |
1833 | } |
1834 | } |
1835 | } |
1836 | |