| 1 | //! impl char {} |
| 2 | |
| 3 | use super::*; |
| 4 | use crate::panic::const_panic; |
| 5 | use crate::slice; |
| 6 | use crate::str::from_utf8_unchecked_mut; |
| 7 | use crate::ub_checks::assert_unsafe_precondition; |
| 8 | use crate::unicode::printable::is_printable; |
| 9 | use crate::unicode::{self, conversions}; |
| 10 | |
| 11 | impl char { |
| 12 | /// The lowest valid code point a `char` can have, `'\0'`. |
| 13 | /// |
| 14 | /// Unlike integer types, `char` actually has a gap in the middle, |
| 15 | /// meaning that the range of possible `char`s is smaller than you |
| 16 | /// might expect. Ranges of `char` will automatically hop this gap |
| 17 | /// for you: |
| 18 | /// |
| 19 | /// ``` |
| 20 | /// let dist = u32::from(char::MAX) - u32::from(char::MIN); |
| 21 | /// let size = (char::MIN..=char::MAX).count() as u32; |
| 22 | /// assert!(size < dist); |
| 23 | /// ``` |
| 24 | /// |
| 25 | /// Despite this gap, the `MIN` and [`MAX`] values can be used as bounds for |
| 26 | /// all `char` values. |
| 27 | /// |
| 28 | /// [`MAX`]: char::MAX |
| 29 | /// |
| 30 | /// # Examples |
| 31 | /// |
| 32 | /// ``` |
| 33 | /// # fn something_which_returns_char() -> char { 'a' } |
| 34 | /// let c: char = something_which_returns_char(); |
| 35 | /// assert!(char::MIN <= c); |
| 36 | /// |
| 37 | /// let value_at_min = u32::from(char::MIN); |
| 38 | /// assert_eq!(char::from_u32(value_at_min), Some(' \0' )); |
| 39 | /// ``` |
| 40 | #[stable (feature = "char_min" , since = "1.83.0" )] |
| 41 | pub const MIN: char = ' \0' ; |
| 42 | |
| 43 | /// The highest valid code point a `char` can have, `'\u{10FFFF}'`. |
| 44 | /// |
| 45 | /// Unlike integer types, `char` actually has a gap in the middle, |
| 46 | /// meaning that the range of possible `char`s is smaller than you |
| 47 | /// might expect. Ranges of `char` will automatically hop this gap |
| 48 | /// for you: |
| 49 | /// |
| 50 | /// ``` |
| 51 | /// let dist = u32::from(char::MAX) - u32::from(char::MIN); |
| 52 | /// let size = (char::MIN..=char::MAX).count() as u32; |
| 53 | /// assert!(size < dist); |
| 54 | /// ``` |
| 55 | /// |
| 56 | /// Despite this gap, the [`MIN`] and `MAX` values can be used as bounds for |
| 57 | /// all `char` values. |
| 58 | /// |
| 59 | /// [`MIN`]: char::MIN |
| 60 | /// |
| 61 | /// # Examples |
| 62 | /// |
| 63 | /// ``` |
| 64 | /// # fn something_which_returns_char() -> char { 'a' } |
| 65 | /// let c: char = something_which_returns_char(); |
| 66 | /// assert!(c <= char::MAX); |
| 67 | /// |
| 68 | /// let value_at_max = u32::from(char::MAX); |
| 69 | /// assert_eq!(char::from_u32(value_at_max), Some(' \u{10FFFF}' )); |
| 70 | /// assert_eq!(char::from_u32(value_at_max + 1), None); |
| 71 | /// ``` |
| 72 | #[stable (feature = "assoc_char_consts" , since = "1.52.0" )] |
| 73 | pub const MAX: char = ' \u{10FFFF}' ; |
| 74 | |
| 75 | /// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to |
| 76 | /// UTF-8 encoding. |
| 77 | #[unstable (feature = "char_max_len" , issue = "121714" )] |
| 78 | pub const MAX_LEN_UTF8: usize = 4; |
| 79 | |
| 80 | /// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char` |
| 81 | /// to UTF-16 encoding. |
| 82 | #[unstable (feature = "char_max_len" , issue = "121714" )] |
| 83 | pub const MAX_LEN_UTF16: usize = 2; |
| 84 | |
| 85 | /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a |
| 86 | /// decoding error. |
| 87 | /// |
| 88 | /// It can occur, for example, when giving ill-formed UTF-8 bytes to |
| 89 | /// [`String::from_utf8_lossy`](../std/string/struct.String.html#method.from_utf8_lossy). |
| 90 | #[stable (feature = "assoc_char_consts" , since = "1.52.0" )] |
| 91 | pub const REPLACEMENT_CHARACTER: char = ' \u{FFFD}' ; |
| 92 | |
| 93 | /// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of |
| 94 | /// `char` and `str` methods are based on. |
| 95 | /// |
| 96 | /// New versions of Unicode are released regularly and subsequently all methods |
| 97 | /// in the standard library depending on Unicode are updated. Therefore the |
| 98 | /// behavior of some `char` and `str` methods and the value of this constant |
| 99 | /// changes over time. This is *not* considered to be a breaking change. |
| 100 | /// |
| 101 | /// The version numbering scheme is explained in |
| 102 | /// [Unicode 11.0 or later, Section 3.1 Versions of the Unicode Standard](https://www.unicode.org/versions/Unicode11.0.0/ch03.pdf#page=4). |
| 103 | #[stable (feature = "assoc_char_consts" , since = "1.52.0" )] |
| 104 | pub const UNICODE_VERSION: (u8, u8, u8) = crate::unicode::UNICODE_VERSION; |
| 105 | |
| 106 | /// Creates an iterator over the native endian UTF-16 encoded code points in `iter`, |
| 107 | /// returning unpaired surrogates as `Err`s. |
| 108 | /// |
| 109 | /// # Examples |
| 110 | /// |
| 111 | /// Basic usage: |
| 112 | /// |
| 113 | /// ``` |
| 114 | /// // 𝄞mus<invalid>ic<invalid> |
| 115 | /// let v = [ |
| 116 | /// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834, |
| 117 | /// ]; |
| 118 | /// |
| 119 | /// assert_eq!( |
| 120 | /// char::decode_utf16(v) |
| 121 | /// .map(|r| r.map_err(|e| e.unpaired_surrogate())) |
| 122 | /// .collect::<Vec<_>>(), |
| 123 | /// vec![ |
| 124 | /// Ok('𝄞' ), |
| 125 | /// Ok('m' ), Ok('u' ), Ok('s' ), |
| 126 | /// Err(0xDD1E), |
| 127 | /// Ok('i' ), Ok('c' ), |
| 128 | /// Err(0xD834) |
| 129 | /// ] |
| 130 | /// ); |
| 131 | /// ``` |
| 132 | /// |
| 133 | /// A lossy decoder can be obtained by replacing `Err` results with the replacement character: |
| 134 | /// |
| 135 | /// ``` |
| 136 | /// // 𝄞mus<invalid>ic<invalid> |
| 137 | /// let v = [ |
| 138 | /// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834, |
| 139 | /// ]; |
| 140 | /// |
| 141 | /// assert_eq!( |
| 142 | /// char::decode_utf16(v) |
| 143 | /// .map(|r| r.unwrap_or(char::REPLACEMENT_CHARACTER)) |
| 144 | /// .collect::<String>(), |
| 145 | /// "𝄞mus�ic�" |
| 146 | /// ); |
| 147 | /// ``` |
| 148 | #[stable (feature = "assoc_char_funcs" , since = "1.52.0" )] |
| 149 | #[inline ] |
| 150 | pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> { |
| 151 | super::decode::decode_utf16(iter) |
| 152 | } |
| 153 | |
| 154 | /// Converts a `u32` to a `char`. |
| 155 | /// |
| 156 | /// Note that all `char`s are valid [`u32`]s, and can be cast to one with |
| 157 | /// [`as`](../std/keyword.as.html): |
| 158 | /// |
| 159 | /// ``` |
| 160 | /// let c = '💯' ; |
| 161 | /// let i = c as u32; |
| 162 | /// |
| 163 | /// assert_eq!(128175, i); |
| 164 | /// ``` |
| 165 | /// |
| 166 | /// However, the reverse is not true: not all valid [`u32`]s are valid |
| 167 | /// `char`s. `from_u32()` will return `None` if the input is not a valid value |
| 168 | /// for a `char`. |
| 169 | /// |
| 170 | /// For an unsafe version of this function which ignores these checks, see |
| 171 | /// [`from_u32_unchecked`]. |
| 172 | /// |
| 173 | /// [`from_u32_unchecked`]: #method.from_u32_unchecked |
| 174 | /// |
| 175 | /// # Examples |
| 176 | /// |
| 177 | /// Basic usage: |
| 178 | /// |
| 179 | /// ``` |
| 180 | /// let c = char::from_u32(0x2764); |
| 181 | /// |
| 182 | /// assert_eq!(Some('❤' ), c); |
| 183 | /// ``` |
| 184 | /// |
| 185 | /// Returning `None` when the input is not a valid `char`: |
| 186 | /// |
| 187 | /// ``` |
| 188 | /// let c = char::from_u32(0x110000); |
| 189 | /// |
| 190 | /// assert_eq!(None, c); |
| 191 | /// ``` |
| 192 | #[stable (feature = "assoc_char_funcs" , since = "1.52.0" )] |
| 193 | #[rustc_const_stable (feature = "const_char_convert" , since = "1.67.0" )] |
| 194 | #[must_use ] |
| 195 | #[inline ] |
| 196 | pub const fn from_u32(i: u32) -> Option<char> { |
| 197 | super::convert::from_u32(i) |
| 198 | } |
| 199 | |
| 200 | /// Converts a `u32` to a `char`, ignoring validity. |
| 201 | /// |
| 202 | /// Note that all `char`s are valid [`u32`]s, and can be cast to one with |
| 203 | /// `as`: |
| 204 | /// |
| 205 | /// ``` |
| 206 | /// let c = '💯' ; |
| 207 | /// let i = c as u32; |
| 208 | /// |
| 209 | /// assert_eq!(128175, i); |
| 210 | /// ``` |
| 211 | /// |
| 212 | /// However, the reverse is not true: not all valid [`u32`]s are valid |
| 213 | /// `char`s. `from_u32_unchecked()` will ignore this, and blindly cast to |
| 214 | /// `char`, possibly creating an invalid one. |
| 215 | /// |
| 216 | /// # Safety |
| 217 | /// |
| 218 | /// This function is unsafe, as it may construct invalid `char` values. |
| 219 | /// |
| 220 | /// For a safe version of this function, see the [`from_u32`] function. |
| 221 | /// |
| 222 | /// [`from_u32`]: #method.from_u32 |
| 223 | /// |
| 224 | /// # Examples |
| 225 | /// |
| 226 | /// Basic usage: |
| 227 | /// |
| 228 | /// ``` |
| 229 | /// let c = unsafe { char::from_u32_unchecked(0x2764) }; |
| 230 | /// |
| 231 | /// assert_eq!('❤' , c); |
| 232 | /// ``` |
| 233 | #[stable (feature = "assoc_char_funcs" , since = "1.52.0" )] |
| 234 | #[rustc_const_stable (feature = "const_char_from_u32_unchecked" , since = "1.81.0" )] |
| 235 | #[must_use ] |
| 236 | #[inline ] |
| 237 | pub const unsafe fn from_u32_unchecked(i: u32) -> char { |
| 238 | // SAFETY: the safety contract must be upheld by the caller. |
| 239 | unsafe { super::convert::from_u32_unchecked(i) } |
| 240 | } |
| 241 | |
| 242 | /// Converts a digit in the given radix to a `char`. |
| 243 | /// |
| 244 | /// A 'radix' here is sometimes also called a 'base'. A radix of two |
| 245 | /// indicates a binary number, a radix of ten, decimal, and a radix of |
| 246 | /// sixteen, hexadecimal, to give some common values. Arbitrary |
| 247 | /// radices are supported. |
| 248 | /// |
| 249 | /// `from_digit()` will return `None` if the input is not a digit in |
| 250 | /// the given radix. |
| 251 | /// |
| 252 | /// # Panics |
| 253 | /// |
| 254 | /// Panics if given a radix larger than 36. |
| 255 | /// |
| 256 | /// # Examples |
| 257 | /// |
| 258 | /// Basic usage: |
| 259 | /// |
| 260 | /// ``` |
| 261 | /// let c = char::from_digit(4, 10); |
| 262 | /// |
| 263 | /// assert_eq!(Some('4' ), c); |
| 264 | /// |
| 265 | /// // Decimal 11 is a single digit in base 16 |
| 266 | /// let c = char::from_digit(11, 16); |
| 267 | /// |
| 268 | /// assert_eq!(Some('b' ), c); |
| 269 | /// ``` |
| 270 | /// |
| 271 | /// Returning `None` when the input is not a digit: |
| 272 | /// |
| 273 | /// ``` |
| 274 | /// let c = char::from_digit(20, 10); |
| 275 | /// |
| 276 | /// assert_eq!(None, c); |
| 277 | /// ``` |
| 278 | /// |
| 279 | /// Passing a large radix, causing a panic: |
| 280 | /// |
| 281 | /// ```should_panic |
| 282 | /// // this panics |
| 283 | /// let _c = char::from_digit(1, 37); |
| 284 | /// ``` |
| 285 | #[stable (feature = "assoc_char_funcs" , since = "1.52.0" )] |
| 286 | #[rustc_const_stable (feature = "const_char_convert" , since = "1.67.0" )] |
| 287 | #[must_use ] |
| 288 | #[inline ] |
| 289 | pub const fn from_digit(num: u32, radix: u32) -> Option<char> { |
| 290 | super::convert::from_digit(num, radix) |
| 291 | } |
| 292 | |
| 293 | /// Checks if a `char` is a digit in the given radix. |
| 294 | /// |
| 295 | /// A 'radix' here is sometimes also called a 'base'. A radix of two |
| 296 | /// indicates a binary number, a radix of ten, decimal, and a radix of |
| 297 | /// sixteen, hexadecimal, to give some common values. Arbitrary |
| 298 | /// radices are supported. |
| 299 | /// |
| 300 | /// Compared to [`is_numeric()`], this function only recognizes the characters |
| 301 | /// `0-9`, `a-z` and `A-Z`. |
| 302 | /// |
| 303 | /// 'Digit' is defined to be only the following characters: |
| 304 | /// |
| 305 | /// * `0-9` |
| 306 | /// * `a-z` |
| 307 | /// * `A-Z` |
| 308 | /// |
| 309 | /// For a more comprehensive understanding of 'digit', see [`is_numeric()`]. |
| 310 | /// |
| 311 | /// [`is_numeric()`]: #method.is_numeric |
| 312 | /// |
| 313 | /// # Panics |
| 314 | /// |
| 315 | /// Panics if given a radix smaller than 2 or larger than 36. |
| 316 | /// |
| 317 | /// # Examples |
| 318 | /// |
| 319 | /// Basic usage: |
| 320 | /// |
| 321 | /// ``` |
| 322 | /// assert!('1' .is_digit(10)); |
| 323 | /// assert!('f' .is_digit(16)); |
| 324 | /// assert!(!'f' .is_digit(10)); |
| 325 | /// ``` |
| 326 | /// |
| 327 | /// Passing a large radix, causing a panic: |
| 328 | /// |
| 329 | /// ```should_panic |
| 330 | /// // this panics |
| 331 | /// '1' .is_digit(37); |
| 332 | /// ``` |
| 333 | /// |
| 334 | /// Passing a small radix, causing a panic: |
| 335 | /// |
| 336 | /// ```should_panic |
| 337 | /// // this panics |
| 338 | /// '1' .is_digit(1); |
| 339 | /// ``` |
| 340 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 341 | #[rustc_const_stable (feature = "const_char_classify" , since = "1.87.0" )] |
| 342 | #[inline ] |
| 343 | pub const fn is_digit(self, radix: u32) -> bool { |
| 344 | self.to_digit(radix).is_some() |
| 345 | } |
| 346 | |
| 347 | /// Converts a `char` to a digit in the given radix. |
| 348 | /// |
| 349 | /// A 'radix' here is sometimes also called a 'base'. A radix of two |
| 350 | /// indicates a binary number, a radix of ten, decimal, and a radix of |
| 351 | /// sixteen, hexadecimal, to give some common values. Arbitrary |
| 352 | /// radices are supported. |
| 353 | /// |
| 354 | /// 'Digit' is defined to be only the following characters: |
| 355 | /// |
| 356 | /// * `0-9` |
| 357 | /// * `a-z` |
| 358 | /// * `A-Z` |
| 359 | /// |
| 360 | /// # Errors |
| 361 | /// |
| 362 | /// Returns `None` if the `char` does not refer to a digit in the given radix. |
| 363 | /// |
| 364 | /// # Panics |
| 365 | /// |
| 366 | /// Panics if given a radix smaller than 2 or larger than 36. |
| 367 | /// |
| 368 | /// # Examples |
| 369 | /// |
| 370 | /// Basic usage: |
| 371 | /// |
| 372 | /// ``` |
| 373 | /// assert_eq!('1' .to_digit(10), Some(1)); |
| 374 | /// assert_eq!('f' .to_digit(16), Some(15)); |
| 375 | /// ``` |
| 376 | /// |
| 377 | /// Passing a non-digit results in failure: |
| 378 | /// |
| 379 | /// ``` |
| 380 | /// assert_eq!('f' .to_digit(10), None); |
| 381 | /// assert_eq!('z' .to_digit(16), None); |
| 382 | /// ``` |
| 383 | /// |
| 384 | /// Passing a large radix, causing a panic: |
| 385 | /// |
| 386 | /// ```should_panic |
| 387 | /// // this panics |
| 388 | /// let _ = '1' .to_digit(37); |
| 389 | /// ``` |
| 390 | /// Passing a small radix, causing a panic: |
| 391 | /// |
| 392 | /// ```should_panic |
| 393 | /// // this panics |
| 394 | /// let _ = '1' .to_digit(1); |
| 395 | /// ``` |
| 396 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 397 | #[rustc_const_stable (feature = "const_char_convert" , since = "1.67.0" )] |
| 398 | #[must_use = "this returns the result of the operation, \ |
| 399 | without modifying the original" ] |
| 400 | #[inline ] |
| 401 | pub const fn to_digit(self, radix: u32) -> Option<u32> { |
| 402 | assert!( |
| 403 | radix >= 2 && radix <= 36, |
| 404 | "to_digit: invalid radix -- radix must be in the range 2 to 36 inclusive" |
| 405 | ); |
| 406 | // check radix to remove letter handling code when radix is a known constant |
| 407 | let value = if self > '9' && radix > 10 { |
| 408 | // mask to convert ASCII letters to uppercase |
| 409 | const TO_UPPERCASE_MASK: u32 = !0b0010_0000; |
| 410 | // Converts an ASCII letter to its corresponding integer value: |
| 411 | // A-Z => 10-35, a-z => 10-35. Other characters produce values >= 36. |
| 412 | // |
| 413 | // Add Overflow Safety: |
| 414 | // By applying the mask after the subtraction, the first addendum is |
| 415 | // constrained such that it never exceeds u32::MAX - 0x20. |
| 416 | ((self as u32).wrapping_sub('A' as u32) & TO_UPPERCASE_MASK) + 10 |
| 417 | } else { |
| 418 | // convert digit to value, non-digits wrap to values > 36 |
| 419 | (self as u32).wrapping_sub('0' as u32) |
| 420 | }; |
| 421 | // FIXME(const-hack): once then_some is const fn, use it here |
| 422 | if value < radix { Some(value) } else { None } |
| 423 | } |
| 424 | |
| 425 | /// Returns an iterator that yields the hexadecimal Unicode escape of a |
| 426 | /// character as `char`s. |
| 427 | /// |
| 428 | /// This will escape characters with the Rust syntax of the form |
| 429 | /// `\u{NNNNNN}` where `NNNNNN` is a hexadecimal representation. |
| 430 | /// |
| 431 | /// # Examples |
| 432 | /// |
| 433 | /// As an iterator: |
| 434 | /// |
| 435 | /// ``` |
| 436 | /// for c in '❤' .escape_unicode() { |
| 437 | /// print!("{c}" ); |
| 438 | /// } |
| 439 | /// println!(); |
| 440 | /// ``` |
| 441 | /// |
| 442 | /// Using `println!` directly: |
| 443 | /// |
| 444 | /// ``` |
| 445 | /// println!("{}" , '❤' .escape_unicode()); |
| 446 | /// ``` |
| 447 | /// |
| 448 | /// Both are equivalent to: |
| 449 | /// |
| 450 | /// ``` |
| 451 | /// println!(" \\u{{2764}}" ); |
| 452 | /// ``` |
| 453 | /// |
| 454 | /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string): |
| 455 | /// |
| 456 | /// ``` |
| 457 | /// assert_eq!('❤' .escape_unicode().to_string(), " \\u{2764}" ); |
| 458 | /// ``` |
| 459 | #[must_use = "this returns the escaped char as an iterator, \ |
| 460 | without modifying the original" ] |
| 461 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 462 | #[inline ] |
| 463 | pub fn escape_unicode(self) -> EscapeUnicode { |
| 464 | EscapeUnicode::new(self) |
| 465 | } |
| 466 | |
| 467 | /// An extended version of `escape_debug` that optionally permits escaping |
| 468 | /// Extended Grapheme codepoints, single quotes, and double quotes. This |
| 469 | /// allows us to format characters like nonspacing marks better when they're |
| 470 | /// at the start of a string, and allows escaping single quotes in |
| 471 | /// characters, and double quotes in strings. |
| 472 | #[inline ] |
| 473 | pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> EscapeDebug { |
| 474 | match self { |
| 475 | ' \0' => EscapeDebug::backslash(ascii::Char::Digit0), |
| 476 | ' \t' => EscapeDebug::backslash(ascii::Char::SmallT), |
| 477 | ' \r' => EscapeDebug::backslash(ascii::Char::SmallR), |
| 478 | ' \n' => EscapeDebug::backslash(ascii::Char::SmallN), |
| 479 | ' \\' => EscapeDebug::backslash(ascii::Char::ReverseSolidus), |
| 480 | ' \"' if args.escape_double_quote => EscapeDebug::backslash(ascii::Char::QuotationMark), |
| 481 | ' \'' if args.escape_single_quote => EscapeDebug::backslash(ascii::Char::Apostrophe), |
| 482 | _ if args.escape_grapheme_extended && self.is_grapheme_extended() => { |
| 483 | EscapeDebug::unicode(self) |
| 484 | } |
| 485 | _ if is_printable(self) => EscapeDebug::printable(self), |
| 486 | _ => EscapeDebug::unicode(self), |
| 487 | } |
| 488 | } |
| 489 | |
| 490 | /// Returns an iterator that yields the literal escape code of a character |
| 491 | /// as `char`s. |
| 492 | /// |
| 493 | /// This will escape the characters similar to the [`Debug`](core::fmt::Debug) implementations |
| 494 | /// of `str` or `char`. |
| 495 | /// |
| 496 | /// # Examples |
| 497 | /// |
| 498 | /// As an iterator: |
| 499 | /// |
| 500 | /// ``` |
| 501 | /// for c in ' \n' .escape_debug() { |
| 502 | /// print!("{c}" ); |
| 503 | /// } |
| 504 | /// println!(); |
| 505 | /// ``` |
| 506 | /// |
| 507 | /// Using `println!` directly: |
| 508 | /// |
| 509 | /// ``` |
| 510 | /// println!("{}" , ' \n' .escape_debug()); |
| 511 | /// ``` |
| 512 | /// |
| 513 | /// Both are equivalent to: |
| 514 | /// |
| 515 | /// ``` |
| 516 | /// println!(" \\n" ); |
| 517 | /// ``` |
| 518 | /// |
| 519 | /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string): |
| 520 | /// |
| 521 | /// ``` |
| 522 | /// assert_eq!(' \n' .escape_debug().to_string(), " \\n" ); |
| 523 | /// ``` |
| 524 | #[must_use = "this returns the escaped char as an iterator, \ |
| 525 | without modifying the original" ] |
| 526 | #[stable (feature = "char_escape_debug" , since = "1.20.0" )] |
| 527 | #[inline ] |
| 528 | pub fn escape_debug(self) -> EscapeDebug { |
| 529 | self.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL) |
| 530 | } |
| 531 | |
| 532 | /// Returns an iterator that yields the literal escape code of a character |
| 533 | /// as `char`s. |
| 534 | /// |
| 535 | /// The default is chosen with a bias toward producing literals that are |
| 536 | /// legal in a variety of languages, including C++11 and similar C-family |
| 537 | /// languages. The exact rules are: |
| 538 | /// |
| 539 | /// * Tab is escaped as `\t`. |
| 540 | /// * Carriage return is escaped as `\r`. |
| 541 | /// * Line feed is escaped as `\n`. |
| 542 | /// * Single quote is escaped as `\'`. |
| 543 | /// * Double quote is escaped as `\"`. |
| 544 | /// * Backslash is escaped as `\\`. |
| 545 | /// * Any character in the 'printable ASCII' range `0x20` .. `0x7e` |
| 546 | /// inclusive is not escaped. |
| 547 | /// * All other characters are given hexadecimal Unicode escapes; see |
| 548 | /// [`escape_unicode`]. |
| 549 | /// |
| 550 | /// [`escape_unicode`]: #method.escape_unicode |
| 551 | /// |
| 552 | /// # Examples |
| 553 | /// |
| 554 | /// As an iterator: |
| 555 | /// |
| 556 | /// ``` |
| 557 | /// for c in '"' .escape_default() { |
| 558 | /// print!("{c}" ); |
| 559 | /// } |
| 560 | /// println!(); |
| 561 | /// ``` |
| 562 | /// |
| 563 | /// Using `println!` directly: |
| 564 | /// |
| 565 | /// ``` |
| 566 | /// println!("{}" , '"' .escape_default()); |
| 567 | /// ``` |
| 568 | /// |
| 569 | /// Both are equivalent to: |
| 570 | /// |
| 571 | /// ``` |
| 572 | /// println!(" \\\"" ); |
| 573 | /// ``` |
| 574 | /// |
| 575 | /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string): |
| 576 | /// |
| 577 | /// ``` |
| 578 | /// assert_eq!('"' .escape_default().to_string(), " \\\"" ); |
| 579 | /// ``` |
| 580 | #[must_use = "this returns the escaped char as an iterator, \ |
| 581 | without modifying the original" ] |
| 582 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 583 | #[inline ] |
| 584 | pub fn escape_default(self) -> EscapeDefault { |
| 585 | match self { |
| 586 | ' \t' => EscapeDefault::backslash(ascii::Char::SmallT), |
| 587 | ' \r' => EscapeDefault::backslash(ascii::Char::SmallR), |
| 588 | ' \n' => EscapeDefault::backslash(ascii::Char::SmallN), |
| 589 | ' \\' | ' \'' | ' \"' => EscapeDefault::backslash(self.as_ascii().unwrap()), |
| 590 | ' \x20' ..=' \x7e' => EscapeDefault::printable(self.as_ascii().unwrap()), |
| 591 | _ => EscapeDefault::unicode(self), |
| 592 | } |
| 593 | } |
| 594 | |
| 595 | /// Returns the number of bytes this `char` would need if encoded in UTF-8. |
| 596 | /// |
| 597 | /// That number of bytes is always between 1 and 4, inclusive. |
| 598 | /// |
| 599 | /// # Examples |
| 600 | /// |
| 601 | /// Basic usage: |
| 602 | /// |
| 603 | /// ``` |
| 604 | /// let len = 'A' .len_utf8(); |
| 605 | /// assert_eq!(len, 1); |
| 606 | /// |
| 607 | /// let len = 'ß' .len_utf8(); |
| 608 | /// assert_eq!(len, 2); |
| 609 | /// |
| 610 | /// let len = 'ℝ' .len_utf8(); |
| 611 | /// assert_eq!(len, 3); |
| 612 | /// |
| 613 | /// let len = '💣' .len_utf8(); |
| 614 | /// assert_eq!(len, 4); |
| 615 | /// ``` |
| 616 | /// |
| 617 | /// The `&str` type guarantees that its contents are UTF-8, and so we can compare the length it |
| 618 | /// would take if each code point was represented as a `char` vs in the `&str` itself: |
| 619 | /// |
| 620 | /// ``` |
| 621 | /// // as chars |
| 622 | /// let eastern = '東' ; |
| 623 | /// let capital = '京' ; |
| 624 | /// |
| 625 | /// // both can be represented as three bytes |
| 626 | /// assert_eq!(3, eastern.len_utf8()); |
| 627 | /// assert_eq!(3, capital.len_utf8()); |
| 628 | /// |
| 629 | /// // as a &str, these two are encoded in UTF-8 |
| 630 | /// let tokyo = "東京" ; |
| 631 | /// |
| 632 | /// let len = eastern.len_utf8() + capital.len_utf8(); |
| 633 | /// |
| 634 | /// // we can see that they take six bytes total... |
| 635 | /// assert_eq!(6, tokyo.len()); |
| 636 | /// |
| 637 | /// // ... just like the &str |
| 638 | /// assert_eq!(len, tokyo.len()); |
| 639 | /// ``` |
| 640 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 641 | #[rustc_const_stable (feature = "const_char_len_utf" , since = "1.52.0" )] |
| 642 | #[inline ] |
| 643 | #[must_use ] |
| 644 | pub const fn len_utf8(self) -> usize { |
| 645 | len_utf8(self as u32) |
| 646 | } |
| 647 | |
| 648 | /// Returns the number of 16-bit code units this `char` would need if |
| 649 | /// encoded in UTF-16. |
| 650 | /// |
| 651 | /// That number of code units is always either 1 or 2, for unicode scalar values in |
| 652 | /// the [basic multilingual plane] or [supplementary planes] respectively. |
| 653 | /// |
| 654 | /// See the documentation for [`len_utf8()`] for more explanation of this |
| 655 | /// concept. This function is a mirror, but for UTF-16 instead of UTF-8. |
| 656 | /// |
| 657 | /// [basic multilingual plane]: http://www.unicode.org/glossary/#basic_multilingual_plane |
| 658 | /// [supplementary planes]: http://www.unicode.org/glossary/#supplementary_planes |
| 659 | /// [`len_utf8()`]: #method.len_utf8 |
| 660 | /// |
| 661 | /// # Examples |
| 662 | /// |
| 663 | /// Basic usage: |
| 664 | /// |
| 665 | /// ``` |
| 666 | /// let n = 'ß' .len_utf16(); |
| 667 | /// assert_eq!(n, 1); |
| 668 | /// |
| 669 | /// let len = '💣' .len_utf16(); |
| 670 | /// assert_eq!(len, 2); |
| 671 | /// ``` |
| 672 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 673 | #[rustc_const_stable (feature = "const_char_len_utf" , since = "1.52.0" )] |
| 674 | #[inline ] |
| 675 | #[must_use ] |
| 676 | pub const fn len_utf16(self) -> usize { |
| 677 | len_utf16(self as u32) |
| 678 | } |
| 679 | |
| 680 | /// Encodes this character as UTF-8 into the provided byte buffer, |
| 681 | /// and then returns the subslice of the buffer that contains the encoded character. |
| 682 | /// |
| 683 | /// # Panics |
| 684 | /// |
| 685 | /// Panics if the buffer is not large enough. |
| 686 | /// A buffer of length four is large enough to encode any `char`. |
| 687 | /// |
| 688 | /// # Examples |
| 689 | /// |
| 690 | /// In both of these examples, 'ß' takes two bytes to encode. |
| 691 | /// |
| 692 | /// ``` |
| 693 | /// let mut b = [0; 2]; |
| 694 | /// |
| 695 | /// let result = 'ß' .encode_utf8(&mut b); |
| 696 | /// |
| 697 | /// assert_eq!(result, "ß" ); |
| 698 | /// |
| 699 | /// assert_eq!(result.len(), 2); |
| 700 | /// ``` |
| 701 | /// |
| 702 | /// A buffer that's too small: |
| 703 | /// |
| 704 | /// ```should_panic |
| 705 | /// let mut b = [0; 1]; |
| 706 | /// |
| 707 | /// // this panics |
| 708 | /// 'ß' .encode_utf8(&mut b); |
| 709 | /// ``` |
| 710 | #[stable (feature = "unicode_encode_char" , since = "1.15.0" )] |
| 711 | #[rustc_const_stable (feature = "const_char_encode_utf8" , since = "1.83.0" )] |
| 712 | #[inline ] |
| 713 | pub const fn encode_utf8(self, dst: &mut [u8]) -> &mut str { |
| 714 | // SAFETY: `char` is not a surrogate, so this is valid UTF-8. |
| 715 | unsafe { from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) } |
| 716 | } |
| 717 | |
| 718 | /// Encodes this character as native endian UTF-16 into the provided `u16` buffer, |
| 719 | /// and then returns the subslice of the buffer that contains the encoded character. |
| 720 | /// |
| 721 | /// # Panics |
| 722 | /// |
| 723 | /// Panics if the buffer is not large enough. |
| 724 | /// A buffer of length 2 is large enough to encode any `char`. |
| 725 | /// |
| 726 | /// # Examples |
| 727 | /// |
| 728 | /// In both of these examples, '𝕊' takes two `u16`s to encode. |
| 729 | /// |
| 730 | /// ``` |
| 731 | /// let mut b = [0; 2]; |
| 732 | /// |
| 733 | /// let result = '𝕊' .encode_utf16(&mut b); |
| 734 | /// |
| 735 | /// assert_eq!(result.len(), 2); |
| 736 | /// ``` |
| 737 | /// |
| 738 | /// A buffer that's too small: |
| 739 | /// |
| 740 | /// ```should_panic |
| 741 | /// let mut b = [0; 1]; |
| 742 | /// |
| 743 | /// // this panics |
| 744 | /// '𝕊' .encode_utf16(&mut b); |
| 745 | /// ``` |
| 746 | #[stable (feature = "unicode_encode_char" , since = "1.15.0" )] |
| 747 | #[rustc_const_stable (feature = "const_char_encode_utf16" , since = "1.84.0" )] |
| 748 | #[inline ] |
| 749 | pub const fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] { |
| 750 | encode_utf16_raw(self as u32, dst) |
| 751 | } |
| 752 | |
| 753 | /// Returns `true` if this `char` has the `Alphabetic` property. |
| 754 | /// |
| 755 | /// `Alphabetic` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and |
| 756 | /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`]. |
| 757 | /// |
| 758 | /// [Unicode Standard]: https://www.unicode.org/versions/latest/ |
| 759 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
| 760 | /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt |
| 761 | /// |
| 762 | /// # Examples |
| 763 | /// |
| 764 | /// Basic usage: |
| 765 | /// |
| 766 | /// ``` |
| 767 | /// assert!('a' .is_alphabetic()); |
| 768 | /// assert!('京' .is_alphabetic()); |
| 769 | /// |
| 770 | /// let c = '💝' ; |
| 771 | /// // love is many things, but it is not alphabetic |
| 772 | /// assert!(!c.is_alphabetic()); |
| 773 | /// ``` |
| 774 | #[must_use ] |
| 775 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 776 | #[inline ] |
| 777 | pub fn is_alphabetic(self) -> bool { |
| 778 | match self { |
| 779 | 'a' ..='z' | 'A' ..='Z' => true, |
| 780 | c => c > ' \x7f' && unicode::Alphabetic(c), |
| 781 | } |
| 782 | } |
| 783 | |
| 784 | /// Returns `true` if this `char` has the `Lowercase` property. |
| 785 | /// |
| 786 | /// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and |
| 787 | /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`]. |
| 788 | /// |
| 789 | /// [Unicode Standard]: https://www.unicode.org/versions/latest/ |
| 790 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
| 791 | /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt |
| 792 | /// |
| 793 | /// # Examples |
| 794 | /// |
| 795 | /// Basic usage: |
| 796 | /// |
| 797 | /// ``` |
| 798 | /// assert!('a' .is_lowercase()); |
| 799 | /// assert!('δ' .is_lowercase()); |
| 800 | /// assert!(!'A' .is_lowercase()); |
| 801 | /// assert!(!'Δ' .is_lowercase()); |
| 802 | /// |
| 803 | /// // The various Chinese scripts and punctuation do not have case, and so: |
| 804 | /// assert!(!'中' .is_lowercase()); |
| 805 | /// assert!(!' ' .is_lowercase()); |
| 806 | /// ``` |
| 807 | /// |
| 808 | /// In a const context: |
| 809 | /// |
| 810 | /// ``` |
| 811 | /// const CAPITAL_DELTA_IS_LOWERCASE: bool = 'Δ' .is_lowercase(); |
| 812 | /// assert!(!CAPITAL_DELTA_IS_LOWERCASE); |
| 813 | /// ``` |
| 814 | #[must_use ] |
| 815 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 816 | #[rustc_const_stable (feature = "const_unicode_case_lookup" , since = "1.84.0" )] |
| 817 | #[inline ] |
| 818 | pub const fn is_lowercase(self) -> bool { |
| 819 | match self { |
| 820 | 'a' ..='z' => true, |
| 821 | c => c > ' \x7f' && unicode::Lowercase(c), |
| 822 | } |
| 823 | } |
| 824 | |
| 825 | /// Returns `true` if this `char` has the `Uppercase` property. |
| 826 | /// |
| 827 | /// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and |
| 828 | /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`]. |
| 829 | /// |
| 830 | /// [Unicode Standard]: https://www.unicode.org/versions/latest/ |
| 831 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
| 832 | /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt |
| 833 | /// |
| 834 | /// # Examples |
| 835 | /// |
| 836 | /// Basic usage: |
| 837 | /// |
| 838 | /// ``` |
| 839 | /// assert!(!'a' .is_uppercase()); |
| 840 | /// assert!(!'δ' .is_uppercase()); |
| 841 | /// assert!('A' .is_uppercase()); |
| 842 | /// assert!('Δ' .is_uppercase()); |
| 843 | /// |
| 844 | /// // The various Chinese scripts and punctuation do not have case, and so: |
| 845 | /// assert!(!'中' .is_uppercase()); |
| 846 | /// assert!(!' ' .is_uppercase()); |
| 847 | /// ``` |
| 848 | /// |
| 849 | /// In a const context: |
| 850 | /// |
| 851 | /// ``` |
| 852 | /// const CAPITAL_DELTA_IS_UPPERCASE: bool = 'Δ' .is_uppercase(); |
| 853 | /// assert!(CAPITAL_DELTA_IS_UPPERCASE); |
| 854 | /// ``` |
| 855 | #[must_use ] |
| 856 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 857 | #[rustc_const_stable (feature = "const_unicode_case_lookup" , since = "1.84.0" )] |
| 858 | #[inline ] |
| 859 | pub const fn is_uppercase(self) -> bool { |
| 860 | match self { |
| 861 | 'A' ..='Z' => true, |
| 862 | c => c > ' \x7f' && unicode::Uppercase(c), |
| 863 | } |
| 864 | } |
| 865 | |
| 866 | /// Returns `true` if this `char` has the `White_Space` property. |
| 867 | /// |
| 868 | /// `White_Space` is specified in the [Unicode Character Database][ucd] [`PropList.txt`]. |
| 869 | /// |
| 870 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
| 871 | /// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt |
| 872 | /// |
| 873 | /// # Examples |
| 874 | /// |
| 875 | /// Basic usage: |
| 876 | /// |
| 877 | /// ``` |
| 878 | /// assert!(' ' .is_whitespace()); |
| 879 | /// |
| 880 | /// // line break |
| 881 | /// assert!(' \n' .is_whitespace()); |
| 882 | /// |
| 883 | /// // a non-breaking space |
| 884 | /// assert!(' \u{A0}' .is_whitespace()); |
| 885 | /// |
| 886 | /// assert!(!'越' .is_whitespace()); |
| 887 | /// ``` |
| 888 | #[must_use ] |
| 889 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 890 | #[rustc_const_stable (feature = "const_char_classify" , since = "1.87.0" )] |
| 891 | #[inline ] |
| 892 | pub const fn is_whitespace(self) -> bool { |
| 893 | match self { |
| 894 | ' ' | ' \x09' ..=' \x0d' => true, |
| 895 | c => c > ' \x7f' && unicode::White_Space(c), |
| 896 | } |
| 897 | } |
| 898 | |
| 899 | /// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`]. |
| 900 | /// |
| 901 | /// [`is_alphabetic()`]: #method.is_alphabetic |
| 902 | /// [`is_numeric()`]: #method.is_numeric |
| 903 | /// |
| 904 | /// # Examples |
| 905 | /// |
| 906 | /// Basic usage: |
| 907 | /// |
| 908 | /// ``` |
| 909 | /// assert!('٣' .is_alphanumeric()); |
| 910 | /// assert!('7' .is_alphanumeric()); |
| 911 | /// assert!('৬' .is_alphanumeric()); |
| 912 | /// assert!('¾' .is_alphanumeric()); |
| 913 | /// assert!('①' .is_alphanumeric()); |
| 914 | /// assert!('K' .is_alphanumeric()); |
| 915 | /// assert!('و' .is_alphanumeric()); |
| 916 | /// assert!('藏' .is_alphanumeric()); |
| 917 | /// ``` |
| 918 | #[must_use ] |
| 919 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 920 | #[inline ] |
| 921 | pub fn is_alphanumeric(self) -> bool { |
| 922 | self.is_alphabetic() || self.is_numeric() |
| 923 | } |
| 924 | |
| 925 | /// Returns `true` if this `char` has the general category for control codes. |
| 926 | /// |
| 927 | /// Control codes (code points with the general category of `Cc`) are described in Chapter 4 |
| 928 | /// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character |
| 929 | /// Database][ucd] [`UnicodeData.txt`]. |
| 930 | /// |
| 931 | /// [Unicode Standard]: https://www.unicode.org/versions/latest/ |
| 932 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
| 933 | /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt |
| 934 | /// |
| 935 | /// # Examples |
| 936 | /// |
| 937 | /// Basic usage: |
| 938 | /// |
| 939 | /// ``` |
| 940 | /// // U+009C, STRING TERMINATOR |
| 941 | /// assert!('' .is_control()); |
| 942 | /// assert!(!'q' .is_control()); |
| 943 | /// ``` |
| 944 | #[must_use ] |
| 945 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 946 | #[inline ] |
| 947 | pub fn is_control(self) -> bool { |
| 948 | unicode::Cc(self) |
| 949 | } |
| 950 | |
| 951 | /// Returns `true` if this `char` has the `Grapheme_Extend` property. |
| 952 | /// |
| 953 | /// `Grapheme_Extend` is described in [Unicode Standard Annex #29 (Unicode Text |
| 954 | /// Segmentation)][uax29] and specified in the [Unicode Character Database][ucd] |
| 955 | /// [`DerivedCoreProperties.txt`]. |
| 956 | /// |
| 957 | /// [uax29]: https://www.unicode.org/reports/tr29/ |
| 958 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
| 959 | /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt |
| 960 | #[must_use ] |
| 961 | #[inline ] |
| 962 | pub(crate) fn is_grapheme_extended(self) -> bool { |
| 963 | unicode::Grapheme_Extend(self) |
| 964 | } |
| 965 | |
| 966 | /// Returns `true` if this `char` has one of the general categories for numbers. |
| 967 | /// |
| 968 | /// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric |
| 969 | /// characters, and `No` for other numeric characters) are specified in the [Unicode Character |
| 970 | /// Database][ucd] [`UnicodeData.txt`]. |
| 971 | /// |
| 972 | /// This method doesn't cover everything that could be considered a number, e.g. ideographic numbers like '三'. |
| 973 | /// If you want everything including characters with overlapping purposes then you might want to use |
| 974 | /// a unicode or language-processing library that exposes the appropriate character properties instead |
| 975 | /// of looking at the unicode categories. |
| 976 | /// |
| 977 | /// If you want to parse ASCII decimal digits (0-9) or ASCII base-N, use |
| 978 | /// `is_ascii_digit` or `is_digit` instead. |
| 979 | /// |
| 980 | /// [Unicode Standard]: https://www.unicode.org/versions/latest/ |
| 981 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
| 982 | /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt |
| 983 | /// |
| 984 | /// # Examples |
| 985 | /// |
| 986 | /// Basic usage: |
| 987 | /// |
| 988 | /// ``` |
| 989 | /// assert!('٣' .is_numeric()); |
| 990 | /// assert!('7' .is_numeric()); |
| 991 | /// assert!('৬' .is_numeric()); |
| 992 | /// assert!('¾' .is_numeric()); |
| 993 | /// assert!('①' .is_numeric()); |
| 994 | /// assert!(!'K' .is_numeric()); |
| 995 | /// assert!(!'و' .is_numeric()); |
| 996 | /// assert!(!'藏' .is_numeric()); |
| 997 | /// assert!(!'三' .is_numeric()); |
| 998 | /// ``` |
| 999 | #[must_use ] |
| 1000 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 1001 | #[inline ] |
| 1002 | pub fn is_numeric(self) -> bool { |
| 1003 | match self { |
| 1004 | '0' ..='9' => true, |
| 1005 | c => c > ' \x7f' && unicode::N(c), |
| 1006 | } |
| 1007 | } |
| 1008 | |
| 1009 | /// Returns an iterator that yields the lowercase mapping of this `char` as one or more |
| 1010 | /// `char`s. |
| 1011 | /// |
| 1012 | /// If this `char` does not have a lowercase mapping, the iterator yields the same `char`. |
| 1013 | /// |
| 1014 | /// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character |
| 1015 | /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`. |
| 1016 | /// |
| 1017 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
| 1018 | /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt |
| 1019 | /// |
| 1020 | /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields |
| 1021 | /// the `char`(s) given by [`SpecialCasing.txt`]. |
| 1022 | /// |
| 1023 | /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt |
| 1024 | /// |
| 1025 | /// This operation performs an unconditional mapping without tailoring. That is, the conversion |
| 1026 | /// is independent of context and language. |
| 1027 | /// |
| 1028 | /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in |
| 1029 | /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion. |
| 1030 | /// |
| 1031 | /// [Unicode Standard]: https://www.unicode.org/versions/latest/ |
| 1032 | /// |
| 1033 | /// # Examples |
| 1034 | /// |
| 1035 | /// As an iterator: |
| 1036 | /// |
| 1037 | /// ``` |
| 1038 | /// for c in 'İ' .to_lowercase() { |
| 1039 | /// print!("{c}" ); |
| 1040 | /// } |
| 1041 | /// println!(); |
| 1042 | /// ``` |
| 1043 | /// |
| 1044 | /// Using `println!` directly: |
| 1045 | /// |
| 1046 | /// ``` |
| 1047 | /// println!("{}" , 'İ' .to_lowercase()); |
| 1048 | /// ``` |
| 1049 | /// |
| 1050 | /// Both are equivalent to: |
| 1051 | /// |
| 1052 | /// ``` |
| 1053 | /// println!("i \u{307}" ); |
| 1054 | /// ``` |
| 1055 | /// |
| 1056 | /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string): |
| 1057 | /// |
| 1058 | /// ``` |
| 1059 | /// assert_eq!('C' .to_lowercase().to_string(), "c" ); |
| 1060 | /// |
| 1061 | /// // Sometimes the result is more than one character: |
| 1062 | /// assert_eq!('İ' .to_lowercase().to_string(), "i \u{307}" ); |
| 1063 | /// |
| 1064 | /// // Characters that do not have both uppercase and lowercase |
| 1065 | /// // convert into themselves. |
| 1066 | /// assert_eq!('山' .to_lowercase().to_string(), "山" ); |
| 1067 | /// ``` |
| 1068 | #[must_use = "this returns the lowercase character as a new iterator, \ |
| 1069 | without modifying the original" ] |
| 1070 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 1071 | #[inline ] |
| 1072 | pub fn to_lowercase(self) -> ToLowercase { |
| 1073 | ToLowercase(CaseMappingIter::new(conversions::to_lower(self))) |
| 1074 | } |
| 1075 | |
| 1076 | /// Returns an iterator that yields the uppercase mapping of this `char` as one or more |
| 1077 | /// `char`s. |
| 1078 | /// |
| 1079 | /// If this `char` does not have an uppercase mapping, the iterator yields the same `char`. |
| 1080 | /// |
| 1081 | /// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character |
| 1082 | /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`. |
| 1083 | /// |
| 1084 | /// [ucd]: https://www.unicode.org/reports/tr44/ |
| 1085 | /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt |
| 1086 | /// |
| 1087 | /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields |
| 1088 | /// the `char`(s) given by [`SpecialCasing.txt`]. |
| 1089 | /// |
| 1090 | /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt |
| 1091 | /// |
| 1092 | /// This operation performs an unconditional mapping without tailoring. That is, the conversion |
| 1093 | /// is independent of context and language. |
| 1094 | /// |
| 1095 | /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in |
| 1096 | /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion. |
| 1097 | /// |
| 1098 | /// [Unicode Standard]: https://www.unicode.org/versions/latest/ |
| 1099 | /// |
| 1100 | /// # Examples |
| 1101 | /// |
| 1102 | /// As an iterator: |
| 1103 | /// |
| 1104 | /// ``` |
| 1105 | /// for c in 'ß' .to_uppercase() { |
| 1106 | /// print!("{c}" ); |
| 1107 | /// } |
| 1108 | /// println!(); |
| 1109 | /// ``` |
| 1110 | /// |
| 1111 | /// Using `println!` directly: |
| 1112 | /// |
| 1113 | /// ``` |
| 1114 | /// println!("{}" , 'ß' .to_uppercase()); |
| 1115 | /// ``` |
| 1116 | /// |
| 1117 | /// Both are equivalent to: |
| 1118 | /// |
| 1119 | /// ``` |
| 1120 | /// println!("SS" ); |
| 1121 | /// ``` |
| 1122 | /// |
| 1123 | /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string): |
| 1124 | /// |
| 1125 | /// ``` |
| 1126 | /// assert_eq!('c' .to_uppercase().to_string(), "C" ); |
| 1127 | /// |
| 1128 | /// // Sometimes the result is more than one character: |
| 1129 | /// assert_eq!('ß' .to_uppercase().to_string(), "SS" ); |
| 1130 | /// |
| 1131 | /// // Characters that do not have both uppercase and lowercase |
| 1132 | /// // convert into themselves. |
| 1133 | /// assert_eq!('山' .to_uppercase().to_string(), "山" ); |
| 1134 | /// ``` |
| 1135 | /// |
| 1136 | /// # Note on locale |
| 1137 | /// |
| 1138 | /// In Turkish, the equivalent of 'i' in Latin has five forms instead of two: |
| 1139 | /// |
| 1140 | /// * 'Dotless': I / ı, sometimes written ï |
| 1141 | /// * 'Dotted': İ / i |
| 1142 | /// |
| 1143 | /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore: |
| 1144 | /// |
| 1145 | /// ``` |
| 1146 | /// let upper_i = 'i' .to_uppercase().to_string(); |
| 1147 | /// ``` |
| 1148 | /// |
| 1149 | /// The value of `upper_i` here relies on the language of the text: if we're |
| 1150 | /// in `en-US`, it should be `"I"`, but if we're in `tr_TR`, it should |
| 1151 | /// be `"İ"`. `to_uppercase()` does not take this into account, and so: |
| 1152 | /// |
| 1153 | /// ``` |
| 1154 | /// let upper_i = 'i' .to_uppercase().to_string(); |
| 1155 | /// |
| 1156 | /// assert_eq!(upper_i, "I" ); |
| 1157 | /// ``` |
| 1158 | /// |
| 1159 | /// holds across languages. |
| 1160 | #[must_use = "this returns the uppercase character as a new iterator, \ |
| 1161 | without modifying the original" ] |
| 1162 | #[stable (feature = "rust1" , since = "1.0.0" )] |
| 1163 | #[inline ] |
| 1164 | pub fn to_uppercase(self) -> ToUppercase { |
| 1165 | ToUppercase(CaseMappingIter::new(conversions::to_upper(self))) |
| 1166 | } |
| 1167 | |
| 1168 | /// Checks if the value is within the ASCII range. |
| 1169 | /// |
| 1170 | /// # Examples |
| 1171 | /// |
| 1172 | /// ``` |
| 1173 | /// let ascii = 'a' ; |
| 1174 | /// let non_ascii = '❤' ; |
| 1175 | /// |
| 1176 | /// assert!(ascii.is_ascii()); |
| 1177 | /// assert!(!non_ascii.is_ascii()); |
| 1178 | /// ``` |
| 1179 | #[must_use ] |
| 1180 | #[stable (feature = "ascii_methods_on_intrinsics" , since = "1.23.0" )] |
| 1181 | #[rustc_const_stable (feature = "const_char_is_ascii" , since = "1.32.0" )] |
| 1182 | #[rustc_diagnostic_item = "char_is_ascii" ] |
| 1183 | #[inline ] |
| 1184 | pub const fn is_ascii(&self) -> bool { |
| 1185 | *self as u32 <= 0x7F |
| 1186 | } |
| 1187 | |
| 1188 | /// Returns `Some` if the value is within the ASCII range, |
| 1189 | /// or `None` if it's not. |
| 1190 | /// |
| 1191 | /// This is preferred to [`Self::is_ascii`] when you're passing the value |
| 1192 | /// along to something else that can take [`ascii::Char`] rather than |
| 1193 | /// needing to check again for itself whether the value is in ASCII. |
| 1194 | #[must_use ] |
| 1195 | #[unstable (feature = "ascii_char" , issue = "110998" )] |
| 1196 | #[inline ] |
| 1197 | pub const fn as_ascii(&self) -> Option<ascii::Char> { |
| 1198 | if self.is_ascii() { |
| 1199 | // SAFETY: Just checked that this is ASCII. |
| 1200 | Some(unsafe { ascii::Char::from_u8_unchecked(*self as u8) }) |
| 1201 | } else { |
| 1202 | None |
| 1203 | } |
| 1204 | } |
| 1205 | |
| 1206 | /// Converts this char into an [ASCII character](`ascii::Char`), without |
| 1207 | /// checking whether it is valid. |
| 1208 | /// |
| 1209 | /// # Safety |
| 1210 | /// |
| 1211 | /// This char must be within the ASCII range, or else this is UB. |
| 1212 | #[must_use ] |
| 1213 | #[unstable (feature = "ascii_char" , issue = "110998" )] |
| 1214 | #[inline ] |
| 1215 | pub const unsafe fn as_ascii_unchecked(&self) -> ascii::Char { |
| 1216 | assert_unsafe_precondition!( |
| 1217 | check_library_ub, |
| 1218 | "as_ascii_unchecked requires that the char is valid ASCII" , |
| 1219 | (it: &char = self) => it.is_ascii() |
| 1220 | ); |
| 1221 | |
| 1222 | // SAFETY: the caller promised that this char is ASCII. |
| 1223 | unsafe { ascii::Char::from_u8_unchecked(*self as u8) } |
| 1224 | } |
| 1225 | |
| 1226 | /// Makes a copy of the value in its ASCII upper case equivalent. |
| 1227 | /// |
| 1228 | /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', |
| 1229 | /// but non-ASCII letters are unchanged. |
| 1230 | /// |
| 1231 | /// To uppercase the value in-place, use [`make_ascii_uppercase()`]. |
| 1232 | /// |
| 1233 | /// To uppercase ASCII characters in addition to non-ASCII characters, use |
| 1234 | /// [`to_uppercase()`]. |
| 1235 | /// |
| 1236 | /// # Examples |
| 1237 | /// |
| 1238 | /// ``` |
| 1239 | /// let ascii = 'a' ; |
| 1240 | /// let non_ascii = '❤' ; |
| 1241 | /// |
| 1242 | /// assert_eq!('A' , ascii.to_ascii_uppercase()); |
| 1243 | /// assert_eq!('❤' , non_ascii.to_ascii_uppercase()); |
| 1244 | /// ``` |
| 1245 | /// |
| 1246 | /// [`make_ascii_uppercase()`]: #method.make_ascii_uppercase |
| 1247 | /// [`to_uppercase()`]: #method.to_uppercase |
| 1248 | #[must_use = "to uppercase the value in-place, use `make_ascii_uppercase()`" ] |
| 1249 | #[stable (feature = "ascii_methods_on_intrinsics" , since = "1.23.0" )] |
| 1250 | #[rustc_const_stable (feature = "const_ascii_methods_on_intrinsics" , since = "1.52.0" )] |
| 1251 | #[inline ] |
| 1252 | pub const fn to_ascii_uppercase(&self) -> char { |
| 1253 | if self.is_ascii_lowercase() { |
| 1254 | (*self as u8).ascii_change_case_unchecked() as char |
| 1255 | } else { |
| 1256 | *self |
| 1257 | } |
| 1258 | } |
| 1259 | |
| 1260 | /// Makes a copy of the value in its ASCII lower case equivalent. |
| 1261 | /// |
| 1262 | /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', |
| 1263 | /// but non-ASCII letters are unchanged. |
| 1264 | /// |
| 1265 | /// To lowercase the value in-place, use [`make_ascii_lowercase()`]. |
| 1266 | /// |
| 1267 | /// To lowercase ASCII characters in addition to non-ASCII characters, use |
| 1268 | /// [`to_lowercase()`]. |
| 1269 | /// |
| 1270 | /// # Examples |
| 1271 | /// |
| 1272 | /// ``` |
| 1273 | /// let ascii = 'A' ; |
| 1274 | /// let non_ascii = '❤' ; |
| 1275 | /// |
| 1276 | /// assert_eq!('a' , ascii.to_ascii_lowercase()); |
| 1277 | /// assert_eq!('❤' , non_ascii.to_ascii_lowercase()); |
| 1278 | /// ``` |
| 1279 | /// |
| 1280 | /// [`make_ascii_lowercase()`]: #method.make_ascii_lowercase |
| 1281 | /// [`to_lowercase()`]: #method.to_lowercase |
| 1282 | #[must_use = "to lowercase the value in-place, use `make_ascii_lowercase()`" ] |
| 1283 | #[stable (feature = "ascii_methods_on_intrinsics" , since = "1.23.0" )] |
| 1284 | #[rustc_const_stable (feature = "const_ascii_methods_on_intrinsics" , since = "1.52.0" )] |
| 1285 | #[inline ] |
| 1286 | pub const fn to_ascii_lowercase(&self) -> char { |
| 1287 | if self.is_ascii_uppercase() { |
| 1288 | (*self as u8).ascii_change_case_unchecked() as char |
| 1289 | } else { |
| 1290 | *self |
| 1291 | } |
| 1292 | } |
| 1293 | |
| 1294 | /// Checks that two values are an ASCII case-insensitive match. |
| 1295 | /// |
| 1296 | /// Equivalent to <code>[to_ascii_lowercase]\(a) == [to_ascii_lowercase]\(b)</code>. |
| 1297 | /// |
| 1298 | /// # Examples |
| 1299 | /// |
| 1300 | /// ``` |
| 1301 | /// let upper_a = 'A' ; |
| 1302 | /// let lower_a = 'a' ; |
| 1303 | /// let lower_z = 'z' ; |
| 1304 | /// |
| 1305 | /// assert!(upper_a.eq_ignore_ascii_case(&lower_a)); |
| 1306 | /// assert!(upper_a.eq_ignore_ascii_case(&upper_a)); |
| 1307 | /// assert!(!upper_a.eq_ignore_ascii_case(&lower_z)); |
| 1308 | /// ``` |
| 1309 | /// |
| 1310 | /// [to_ascii_lowercase]: #method.to_ascii_lowercase |
| 1311 | #[stable (feature = "ascii_methods_on_intrinsics" , since = "1.23.0" )] |
| 1312 | #[rustc_const_stable (feature = "const_ascii_methods_on_intrinsics" , since = "1.52.0" )] |
| 1313 | #[inline ] |
| 1314 | pub const fn eq_ignore_ascii_case(&self, other: &char) -> bool { |
| 1315 | self.to_ascii_lowercase() == other.to_ascii_lowercase() |
| 1316 | } |
| 1317 | |
| 1318 | /// Converts this type to its ASCII upper case equivalent in-place. |
| 1319 | /// |
| 1320 | /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', |
| 1321 | /// but non-ASCII letters are unchanged. |
| 1322 | /// |
| 1323 | /// To return a new uppercased value without modifying the existing one, use |
| 1324 | /// [`to_ascii_uppercase()`]. |
| 1325 | /// |
| 1326 | /// # Examples |
| 1327 | /// |
| 1328 | /// ``` |
| 1329 | /// let mut ascii = 'a' ; |
| 1330 | /// |
| 1331 | /// ascii.make_ascii_uppercase(); |
| 1332 | /// |
| 1333 | /// assert_eq!('A' , ascii); |
| 1334 | /// ``` |
| 1335 | /// |
| 1336 | /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase |
| 1337 | #[stable (feature = "ascii_methods_on_intrinsics" , since = "1.23.0" )] |
| 1338 | #[rustc_const_stable (feature = "const_make_ascii" , since = "1.84.0" )] |
| 1339 | #[inline ] |
| 1340 | pub const fn make_ascii_uppercase(&mut self) { |
| 1341 | *self = self.to_ascii_uppercase(); |
| 1342 | } |
| 1343 | |
| 1344 | /// Converts this type to its ASCII lower case equivalent in-place. |
| 1345 | /// |
| 1346 | /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', |
| 1347 | /// but non-ASCII letters are unchanged. |
| 1348 | /// |
| 1349 | /// To return a new lowercased value without modifying the existing one, use |
| 1350 | /// [`to_ascii_lowercase()`]. |
| 1351 | /// |
| 1352 | /// # Examples |
| 1353 | /// |
| 1354 | /// ``` |
| 1355 | /// let mut ascii = 'A' ; |
| 1356 | /// |
| 1357 | /// ascii.make_ascii_lowercase(); |
| 1358 | /// |
| 1359 | /// assert_eq!('a' , ascii); |
| 1360 | /// ``` |
| 1361 | /// |
| 1362 | /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase |
| 1363 | #[stable (feature = "ascii_methods_on_intrinsics" , since = "1.23.0" )] |
| 1364 | #[rustc_const_stable (feature = "const_make_ascii" , since = "1.84.0" )] |
| 1365 | #[inline ] |
| 1366 | pub const fn make_ascii_lowercase(&mut self) { |
| 1367 | *self = self.to_ascii_lowercase(); |
| 1368 | } |
| 1369 | |
| 1370 | /// Checks if the value is an ASCII alphabetic character: |
| 1371 | /// |
| 1372 | /// - U+0041 'A' ..= U+005A 'Z', or |
| 1373 | /// - U+0061 'a' ..= U+007A 'z'. |
| 1374 | /// |
| 1375 | /// # Examples |
| 1376 | /// |
| 1377 | /// ``` |
| 1378 | /// let uppercase_a = 'A' ; |
| 1379 | /// let uppercase_g = 'G' ; |
| 1380 | /// let a = 'a' ; |
| 1381 | /// let g = 'g' ; |
| 1382 | /// let zero = '0' ; |
| 1383 | /// let percent = '%' ; |
| 1384 | /// let space = ' ' ; |
| 1385 | /// let lf = ' \n' ; |
| 1386 | /// let esc = ' \x1b' ; |
| 1387 | /// |
| 1388 | /// assert!(uppercase_a.is_ascii_alphabetic()); |
| 1389 | /// assert!(uppercase_g.is_ascii_alphabetic()); |
| 1390 | /// assert!(a.is_ascii_alphabetic()); |
| 1391 | /// assert!(g.is_ascii_alphabetic()); |
| 1392 | /// assert!(!zero.is_ascii_alphabetic()); |
| 1393 | /// assert!(!percent.is_ascii_alphabetic()); |
| 1394 | /// assert!(!space.is_ascii_alphabetic()); |
| 1395 | /// assert!(!lf.is_ascii_alphabetic()); |
| 1396 | /// assert!(!esc.is_ascii_alphabetic()); |
| 1397 | /// ``` |
| 1398 | #[must_use ] |
| 1399 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
| 1400 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
| 1401 | #[inline ] |
| 1402 | pub const fn is_ascii_alphabetic(&self) -> bool { |
| 1403 | matches!(*self, 'A' ..='Z' | 'a' ..='z' ) |
| 1404 | } |
| 1405 | |
| 1406 | /// Checks if the value is an ASCII uppercase character: |
| 1407 | /// U+0041 'A' ..= U+005A 'Z'. |
| 1408 | /// |
| 1409 | /// # Examples |
| 1410 | /// |
| 1411 | /// ``` |
| 1412 | /// let uppercase_a = 'A' ; |
| 1413 | /// let uppercase_g = 'G' ; |
| 1414 | /// let a = 'a' ; |
| 1415 | /// let g = 'g' ; |
| 1416 | /// let zero = '0' ; |
| 1417 | /// let percent = '%' ; |
| 1418 | /// let space = ' ' ; |
| 1419 | /// let lf = ' \n' ; |
| 1420 | /// let esc = ' \x1b' ; |
| 1421 | /// |
| 1422 | /// assert!(uppercase_a.is_ascii_uppercase()); |
| 1423 | /// assert!(uppercase_g.is_ascii_uppercase()); |
| 1424 | /// assert!(!a.is_ascii_uppercase()); |
| 1425 | /// assert!(!g.is_ascii_uppercase()); |
| 1426 | /// assert!(!zero.is_ascii_uppercase()); |
| 1427 | /// assert!(!percent.is_ascii_uppercase()); |
| 1428 | /// assert!(!space.is_ascii_uppercase()); |
| 1429 | /// assert!(!lf.is_ascii_uppercase()); |
| 1430 | /// assert!(!esc.is_ascii_uppercase()); |
| 1431 | /// ``` |
| 1432 | #[must_use ] |
| 1433 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
| 1434 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
| 1435 | #[inline ] |
| 1436 | pub const fn is_ascii_uppercase(&self) -> bool { |
| 1437 | matches!(*self, 'A' ..='Z' ) |
| 1438 | } |
| 1439 | |
| 1440 | /// Checks if the value is an ASCII lowercase character: |
| 1441 | /// U+0061 'a' ..= U+007A 'z'. |
| 1442 | /// |
| 1443 | /// # Examples |
| 1444 | /// |
| 1445 | /// ``` |
| 1446 | /// let uppercase_a = 'A' ; |
| 1447 | /// let uppercase_g = 'G' ; |
| 1448 | /// let a = 'a' ; |
| 1449 | /// let g = 'g' ; |
| 1450 | /// let zero = '0' ; |
| 1451 | /// let percent = '%' ; |
| 1452 | /// let space = ' ' ; |
| 1453 | /// let lf = ' \n' ; |
| 1454 | /// let esc = ' \x1b' ; |
| 1455 | /// |
| 1456 | /// assert!(!uppercase_a.is_ascii_lowercase()); |
| 1457 | /// assert!(!uppercase_g.is_ascii_lowercase()); |
| 1458 | /// assert!(a.is_ascii_lowercase()); |
| 1459 | /// assert!(g.is_ascii_lowercase()); |
| 1460 | /// assert!(!zero.is_ascii_lowercase()); |
| 1461 | /// assert!(!percent.is_ascii_lowercase()); |
| 1462 | /// assert!(!space.is_ascii_lowercase()); |
| 1463 | /// assert!(!lf.is_ascii_lowercase()); |
| 1464 | /// assert!(!esc.is_ascii_lowercase()); |
| 1465 | /// ``` |
| 1466 | #[must_use ] |
| 1467 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
| 1468 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
| 1469 | #[inline ] |
| 1470 | pub const fn is_ascii_lowercase(&self) -> bool { |
| 1471 | matches!(*self, 'a' ..='z' ) |
| 1472 | } |
| 1473 | |
| 1474 | /// Checks if the value is an ASCII alphanumeric character: |
| 1475 | /// |
| 1476 | /// - U+0041 'A' ..= U+005A 'Z', or |
| 1477 | /// - U+0061 'a' ..= U+007A 'z', or |
| 1478 | /// - U+0030 '0' ..= U+0039 '9'. |
| 1479 | /// |
| 1480 | /// # Examples |
| 1481 | /// |
| 1482 | /// ``` |
| 1483 | /// let uppercase_a = 'A' ; |
| 1484 | /// let uppercase_g = 'G' ; |
| 1485 | /// let a = 'a' ; |
| 1486 | /// let g = 'g' ; |
| 1487 | /// let zero = '0' ; |
| 1488 | /// let percent = '%' ; |
| 1489 | /// let space = ' ' ; |
| 1490 | /// let lf = ' \n' ; |
| 1491 | /// let esc = ' \x1b' ; |
| 1492 | /// |
| 1493 | /// assert!(uppercase_a.is_ascii_alphanumeric()); |
| 1494 | /// assert!(uppercase_g.is_ascii_alphanumeric()); |
| 1495 | /// assert!(a.is_ascii_alphanumeric()); |
| 1496 | /// assert!(g.is_ascii_alphanumeric()); |
| 1497 | /// assert!(zero.is_ascii_alphanumeric()); |
| 1498 | /// assert!(!percent.is_ascii_alphanumeric()); |
| 1499 | /// assert!(!space.is_ascii_alphanumeric()); |
| 1500 | /// assert!(!lf.is_ascii_alphanumeric()); |
| 1501 | /// assert!(!esc.is_ascii_alphanumeric()); |
| 1502 | /// ``` |
| 1503 | #[must_use ] |
| 1504 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
| 1505 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
| 1506 | #[inline ] |
| 1507 | pub const fn is_ascii_alphanumeric(&self) -> bool { |
| 1508 | matches!(*self, '0' ..='9' ) | matches!(*self, 'A' ..='Z' ) | matches!(*self, 'a' ..='z' ) |
| 1509 | } |
| 1510 | |
| 1511 | /// Checks if the value is an ASCII decimal digit: |
| 1512 | /// U+0030 '0' ..= U+0039 '9'. |
| 1513 | /// |
| 1514 | /// # Examples |
| 1515 | /// |
| 1516 | /// ``` |
| 1517 | /// let uppercase_a = 'A' ; |
| 1518 | /// let uppercase_g = 'G' ; |
| 1519 | /// let a = 'a' ; |
| 1520 | /// let g = 'g' ; |
| 1521 | /// let zero = '0' ; |
| 1522 | /// let percent = '%' ; |
| 1523 | /// let space = ' ' ; |
| 1524 | /// let lf = ' \n' ; |
| 1525 | /// let esc = ' \x1b' ; |
| 1526 | /// |
| 1527 | /// assert!(!uppercase_a.is_ascii_digit()); |
| 1528 | /// assert!(!uppercase_g.is_ascii_digit()); |
| 1529 | /// assert!(!a.is_ascii_digit()); |
| 1530 | /// assert!(!g.is_ascii_digit()); |
| 1531 | /// assert!(zero.is_ascii_digit()); |
| 1532 | /// assert!(!percent.is_ascii_digit()); |
| 1533 | /// assert!(!space.is_ascii_digit()); |
| 1534 | /// assert!(!lf.is_ascii_digit()); |
| 1535 | /// assert!(!esc.is_ascii_digit()); |
| 1536 | /// ``` |
| 1537 | #[must_use ] |
| 1538 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
| 1539 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
| 1540 | #[inline ] |
| 1541 | pub const fn is_ascii_digit(&self) -> bool { |
| 1542 | matches!(*self, '0' ..='9' ) |
| 1543 | } |
| 1544 | |
| 1545 | /// Checks if the value is an ASCII octal digit: |
| 1546 | /// U+0030 '0' ..= U+0037 '7'. |
| 1547 | /// |
| 1548 | /// # Examples |
| 1549 | /// |
| 1550 | /// ``` |
| 1551 | /// #![feature(is_ascii_octdigit)] |
| 1552 | /// |
| 1553 | /// let uppercase_a = 'A' ; |
| 1554 | /// let a = 'a' ; |
| 1555 | /// let zero = '0' ; |
| 1556 | /// let seven = '7' ; |
| 1557 | /// let nine = '9' ; |
| 1558 | /// let percent = '%' ; |
| 1559 | /// let lf = ' \n' ; |
| 1560 | /// |
| 1561 | /// assert!(!uppercase_a.is_ascii_octdigit()); |
| 1562 | /// assert!(!a.is_ascii_octdigit()); |
| 1563 | /// assert!(zero.is_ascii_octdigit()); |
| 1564 | /// assert!(seven.is_ascii_octdigit()); |
| 1565 | /// assert!(!nine.is_ascii_octdigit()); |
| 1566 | /// assert!(!percent.is_ascii_octdigit()); |
| 1567 | /// assert!(!lf.is_ascii_octdigit()); |
| 1568 | /// ``` |
| 1569 | #[must_use ] |
| 1570 | #[unstable (feature = "is_ascii_octdigit" , issue = "101288" )] |
| 1571 | #[inline ] |
| 1572 | pub const fn is_ascii_octdigit(&self) -> bool { |
| 1573 | matches!(*self, '0' ..='7' ) |
| 1574 | } |
| 1575 | |
| 1576 | /// Checks if the value is an ASCII hexadecimal digit: |
| 1577 | /// |
| 1578 | /// - U+0030 '0' ..= U+0039 '9', or |
| 1579 | /// - U+0041 'A' ..= U+0046 'F', or |
| 1580 | /// - U+0061 'a' ..= U+0066 'f'. |
| 1581 | /// |
| 1582 | /// # Examples |
| 1583 | /// |
| 1584 | /// ``` |
| 1585 | /// let uppercase_a = 'A' ; |
| 1586 | /// let uppercase_g = 'G' ; |
| 1587 | /// let a = 'a' ; |
| 1588 | /// let g = 'g' ; |
| 1589 | /// let zero = '0' ; |
| 1590 | /// let percent = '%' ; |
| 1591 | /// let space = ' ' ; |
| 1592 | /// let lf = ' \n' ; |
| 1593 | /// let esc = ' \x1b' ; |
| 1594 | /// |
| 1595 | /// assert!(uppercase_a.is_ascii_hexdigit()); |
| 1596 | /// assert!(!uppercase_g.is_ascii_hexdigit()); |
| 1597 | /// assert!(a.is_ascii_hexdigit()); |
| 1598 | /// assert!(!g.is_ascii_hexdigit()); |
| 1599 | /// assert!(zero.is_ascii_hexdigit()); |
| 1600 | /// assert!(!percent.is_ascii_hexdigit()); |
| 1601 | /// assert!(!space.is_ascii_hexdigit()); |
| 1602 | /// assert!(!lf.is_ascii_hexdigit()); |
| 1603 | /// assert!(!esc.is_ascii_hexdigit()); |
| 1604 | /// ``` |
| 1605 | #[must_use ] |
| 1606 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
| 1607 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
| 1608 | #[inline ] |
| 1609 | pub const fn is_ascii_hexdigit(&self) -> bool { |
| 1610 | matches!(*self, '0' ..='9' ) | matches!(*self, 'A' ..='F' ) | matches!(*self, 'a' ..='f' ) |
| 1611 | } |
| 1612 | |
| 1613 | /// Checks if the value is an ASCII punctuation character: |
| 1614 | /// |
| 1615 | /// - U+0021 ..= U+002F `! " # $ % & ' ( ) * + , - . /`, or |
| 1616 | /// - U+003A ..= U+0040 `: ; < = > ? @`, or |
| 1617 | /// - U+005B ..= U+0060 ``[ \ ] ^ _ ` ``, or |
| 1618 | /// - U+007B ..= U+007E `{ | } ~` |
| 1619 | /// |
| 1620 | /// # Examples |
| 1621 | /// |
| 1622 | /// ``` |
| 1623 | /// let uppercase_a = 'A' ; |
| 1624 | /// let uppercase_g = 'G' ; |
| 1625 | /// let a = 'a' ; |
| 1626 | /// let g = 'g' ; |
| 1627 | /// let zero = '0' ; |
| 1628 | /// let percent = '%' ; |
| 1629 | /// let space = ' ' ; |
| 1630 | /// let lf = ' \n' ; |
| 1631 | /// let esc = ' \x1b' ; |
| 1632 | /// |
| 1633 | /// assert!(!uppercase_a.is_ascii_punctuation()); |
| 1634 | /// assert!(!uppercase_g.is_ascii_punctuation()); |
| 1635 | /// assert!(!a.is_ascii_punctuation()); |
| 1636 | /// assert!(!g.is_ascii_punctuation()); |
| 1637 | /// assert!(!zero.is_ascii_punctuation()); |
| 1638 | /// assert!(percent.is_ascii_punctuation()); |
| 1639 | /// assert!(!space.is_ascii_punctuation()); |
| 1640 | /// assert!(!lf.is_ascii_punctuation()); |
| 1641 | /// assert!(!esc.is_ascii_punctuation()); |
| 1642 | /// ``` |
| 1643 | #[must_use ] |
| 1644 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
| 1645 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
| 1646 | #[inline ] |
| 1647 | pub const fn is_ascii_punctuation(&self) -> bool { |
| 1648 | matches!(*self, '!' ..='/' ) |
| 1649 | | matches!(*self, ':' ..='@' ) |
| 1650 | | matches!(*self, '[' ..='`' ) |
| 1651 | | matches!(*self, '{' ..='~' ) |
| 1652 | } |
| 1653 | |
| 1654 | /// Checks if the value is an ASCII graphic character: |
| 1655 | /// U+0021 '!' ..= U+007E '~'. |
| 1656 | /// |
| 1657 | /// # Examples |
| 1658 | /// |
| 1659 | /// ``` |
| 1660 | /// let uppercase_a = 'A' ; |
| 1661 | /// let uppercase_g = 'G' ; |
| 1662 | /// let a = 'a' ; |
| 1663 | /// let g = 'g' ; |
| 1664 | /// let zero = '0' ; |
| 1665 | /// let percent = '%' ; |
| 1666 | /// let space = ' ' ; |
| 1667 | /// let lf = ' \n' ; |
| 1668 | /// let esc = ' \x1b' ; |
| 1669 | /// |
| 1670 | /// assert!(uppercase_a.is_ascii_graphic()); |
| 1671 | /// assert!(uppercase_g.is_ascii_graphic()); |
| 1672 | /// assert!(a.is_ascii_graphic()); |
| 1673 | /// assert!(g.is_ascii_graphic()); |
| 1674 | /// assert!(zero.is_ascii_graphic()); |
| 1675 | /// assert!(percent.is_ascii_graphic()); |
| 1676 | /// assert!(!space.is_ascii_graphic()); |
| 1677 | /// assert!(!lf.is_ascii_graphic()); |
| 1678 | /// assert!(!esc.is_ascii_graphic()); |
| 1679 | /// ``` |
| 1680 | #[must_use ] |
| 1681 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
| 1682 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
| 1683 | #[inline ] |
| 1684 | pub const fn is_ascii_graphic(&self) -> bool { |
| 1685 | matches!(*self, '!' ..='~' ) |
| 1686 | } |
| 1687 | |
| 1688 | /// Checks if the value is an ASCII whitespace character: |
| 1689 | /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED, |
| 1690 | /// U+000C FORM FEED, or U+000D CARRIAGE RETURN. |
| 1691 | /// |
| 1692 | /// Rust uses the WhatWG Infra Standard's [definition of ASCII |
| 1693 | /// whitespace][infra-aw]. There are several other definitions in |
| 1694 | /// wide use. For instance, [the POSIX locale][pct] includes |
| 1695 | /// U+000B VERTICAL TAB as well as all the above characters, |
| 1696 | /// but—from the very same specification—[the default rule for |
| 1697 | /// "field splitting" in the Bourne shell][bfs] considers *only* |
| 1698 | /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace. |
| 1699 | /// |
| 1700 | /// If you are writing a program that will process an existing |
| 1701 | /// file format, check what that format's definition of whitespace is |
| 1702 | /// before using this function. |
| 1703 | /// |
| 1704 | /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace |
| 1705 | /// [pct]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01 |
| 1706 | /// [bfs]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05 |
| 1707 | /// |
| 1708 | /// # Examples |
| 1709 | /// |
| 1710 | /// ``` |
| 1711 | /// let uppercase_a = 'A' ; |
| 1712 | /// let uppercase_g = 'G' ; |
| 1713 | /// let a = 'a' ; |
| 1714 | /// let g = 'g' ; |
| 1715 | /// let zero = '0' ; |
| 1716 | /// let percent = '%' ; |
| 1717 | /// let space = ' ' ; |
| 1718 | /// let lf = ' \n' ; |
| 1719 | /// let esc = ' \x1b' ; |
| 1720 | /// |
| 1721 | /// assert!(!uppercase_a.is_ascii_whitespace()); |
| 1722 | /// assert!(!uppercase_g.is_ascii_whitespace()); |
| 1723 | /// assert!(!a.is_ascii_whitespace()); |
| 1724 | /// assert!(!g.is_ascii_whitespace()); |
| 1725 | /// assert!(!zero.is_ascii_whitespace()); |
| 1726 | /// assert!(!percent.is_ascii_whitespace()); |
| 1727 | /// assert!(space.is_ascii_whitespace()); |
| 1728 | /// assert!(lf.is_ascii_whitespace()); |
| 1729 | /// assert!(!esc.is_ascii_whitespace()); |
| 1730 | /// ``` |
| 1731 | #[must_use ] |
| 1732 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
| 1733 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
| 1734 | #[inline ] |
| 1735 | pub const fn is_ascii_whitespace(&self) -> bool { |
| 1736 | matches!(*self, ' \t' | ' \n' | ' \x0C' | ' \r' | ' ' ) |
| 1737 | } |
| 1738 | |
| 1739 | /// Checks if the value is an ASCII control character: |
| 1740 | /// U+0000 NUL ..= U+001F UNIT SEPARATOR, or U+007F DELETE. |
| 1741 | /// Note that most ASCII whitespace characters are control |
| 1742 | /// characters, but SPACE is not. |
| 1743 | /// |
| 1744 | /// # Examples |
| 1745 | /// |
| 1746 | /// ``` |
| 1747 | /// let uppercase_a = 'A' ; |
| 1748 | /// let uppercase_g = 'G' ; |
| 1749 | /// let a = 'a' ; |
| 1750 | /// let g = 'g' ; |
| 1751 | /// let zero = '0' ; |
| 1752 | /// let percent = '%' ; |
| 1753 | /// let space = ' ' ; |
| 1754 | /// let lf = ' \n' ; |
| 1755 | /// let esc = ' \x1b' ; |
| 1756 | /// |
| 1757 | /// assert!(!uppercase_a.is_ascii_control()); |
| 1758 | /// assert!(!uppercase_g.is_ascii_control()); |
| 1759 | /// assert!(!a.is_ascii_control()); |
| 1760 | /// assert!(!g.is_ascii_control()); |
| 1761 | /// assert!(!zero.is_ascii_control()); |
| 1762 | /// assert!(!percent.is_ascii_control()); |
| 1763 | /// assert!(!space.is_ascii_control()); |
| 1764 | /// assert!(lf.is_ascii_control()); |
| 1765 | /// assert!(esc.is_ascii_control()); |
| 1766 | /// ``` |
| 1767 | #[must_use ] |
| 1768 | #[stable (feature = "ascii_ctype_on_intrinsics" , since = "1.24.0" )] |
| 1769 | #[rustc_const_stable (feature = "const_ascii_ctype_on_intrinsics" , since = "1.47.0" )] |
| 1770 | #[inline ] |
| 1771 | pub const fn is_ascii_control(&self) -> bool { |
| 1772 | matches!(*self, ' \0' ..=' \x1F' | ' \x7F' ) |
| 1773 | } |
| 1774 | } |
| 1775 | |
| 1776 | pub(crate) struct EscapeDebugExtArgs { |
| 1777 | /// Escape Extended Grapheme codepoints? |
| 1778 | pub(crate) escape_grapheme_extended: bool, |
| 1779 | |
| 1780 | /// Escape single quotes? |
| 1781 | pub(crate) escape_single_quote: bool, |
| 1782 | |
| 1783 | /// Escape double quotes? |
| 1784 | pub(crate) escape_double_quote: bool, |
| 1785 | } |
| 1786 | |
| 1787 | impl EscapeDebugExtArgs { |
| 1788 | pub(crate) const ESCAPE_ALL: Self = Self { |
| 1789 | escape_grapheme_extended: true, |
| 1790 | escape_single_quote: true, |
| 1791 | escape_double_quote: true, |
| 1792 | }; |
| 1793 | } |
| 1794 | |
| 1795 | #[inline ] |
| 1796 | #[must_use ] |
| 1797 | const fn len_utf8(code: u32) -> usize { |
| 1798 | match code { |
| 1799 | ..MAX_ONE_B => 1, |
| 1800 | ..MAX_TWO_B => 2, |
| 1801 | ..MAX_THREE_B => 3, |
| 1802 | _ => 4, |
| 1803 | } |
| 1804 | } |
| 1805 | |
| 1806 | #[inline ] |
| 1807 | #[must_use ] |
| 1808 | const fn len_utf16(code: u32) -> usize { |
| 1809 | if (code & 0xFFFF) == code { 1 } else { 2 } |
| 1810 | } |
| 1811 | |
| 1812 | /// Encodes a raw `u32` value as UTF-8 into the provided byte buffer, |
| 1813 | /// and then returns the subslice of the buffer that contains the encoded character. |
| 1814 | /// |
| 1815 | /// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range. |
| 1816 | /// (Creating a `char` in the surrogate range is UB.) |
| 1817 | /// The result is valid [generalized UTF-8] but not valid UTF-8. |
| 1818 | /// |
| 1819 | /// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8 |
| 1820 | /// |
| 1821 | /// # Panics |
| 1822 | /// |
| 1823 | /// Panics if the buffer is not large enough. |
| 1824 | /// A buffer of length four is large enough to encode any `char`. |
| 1825 | #[unstable (feature = "char_internals" , reason = "exposed only for libstd" , issue = "none" )] |
| 1826 | #[doc (hidden)] |
| 1827 | #[inline ] |
| 1828 | pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] { |
| 1829 | let len: usize = len_utf8(code); |
| 1830 | if dst.len() < len { |
| 1831 | const_panic!( |
| 1832 | "encode_utf8: buffer does not have enough bytes to encode code point" , |
| 1833 | "encode_utf8: need {len} bytes to encode U+ {code:04X} but buffer has just {dst_len}" , |
| 1834 | code: u32 = code, |
| 1835 | len: usize = len, |
| 1836 | dst_len: usize = dst.len(), |
| 1837 | ); |
| 1838 | } |
| 1839 | |
| 1840 | // SAFETY: `dst` is checked to be at least the length needed to encode the codepoint. |
| 1841 | unsafe { encode_utf8_raw_unchecked(code, dst.as_mut_ptr()) }; |
| 1842 | |
| 1843 | // SAFETY: `<&mut [u8]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds. |
| 1844 | unsafe { slice::from_raw_parts_mut(data:dst.as_mut_ptr(), len) } |
| 1845 | } |
| 1846 | |
| 1847 | /// Encodes a raw `u32` value as UTF-8 into the byte buffer pointed to by `dst`. |
| 1848 | /// |
| 1849 | /// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range. |
| 1850 | /// (Creating a `char` in the surrogate range is UB.) |
| 1851 | /// The result is valid [generalized UTF-8] but not valid UTF-8. |
| 1852 | /// |
| 1853 | /// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8 |
| 1854 | /// |
| 1855 | /// # Safety |
| 1856 | /// |
| 1857 | /// The behavior is undefined if the buffer pointed to by `dst` is not |
| 1858 | /// large enough to hold the encoded codepoint. A buffer of length four |
| 1859 | /// is large enough to encode any `char`. |
| 1860 | /// |
| 1861 | /// For a safe version of this function, see the [`encode_utf8_raw`] function. |
| 1862 | #[unstable (feature = "char_internals" , reason = "exposed only for libstd" , issue = "none" )] |
| 1863 | #[doc (hidden)] |
| 1864 | #[inline ] |
| 1865 | pub const unsafe fn encode_utf8_raw_unchecked(code: u32, dst: *mut u8) { |
| 1866 | let len = len_utf8(code); |
| 1867 | // SAFETY: The caller must guarantee that the buffer pointed to by `dst` |
| 1868 | // is at least `len` bytes long. |
| 1869 | unsafe { |
| 1870 | match len { |
| 1871 | 1 => { |
| 1872 | *dst = code as u8; |
| 1873 | } |
| 1874 | 2 => { |
| 1875 | *dst = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; |
| 1876 | *dst.add(1) = (code & 0x3F) as u8 | TAG_CONT; |
| 1877 | } |
| 1878 | 3 => { |
| 1879 | *dst = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; |
| 1880 | *dst.add(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
| 1881 | *dst.add(2) = (code & 0x3F) as u8 | TAG_CONT; |
| 1882 | } |
| 1883 | 4 => { |
| 1884 | *dst = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; |
| 1885 | *dst.add(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT; |
| 1886 | *dst.add(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
| 1887 | *dst.add(3) = (code & 0x3F) as u8 | TAG_CONT; |
| 1888 | } |
| 1889 | // SAFETY: `char` always takes between 1 and 4 bytes to encode in UTF-8. |
| 1890 | _ => crate::hint::unreachable_unchecked(), |
| 1891 | } |
| 1892 | } |
| 1893 | } |
| 1894 | |
| 1895 | /// Encodes a raw `u32` value as native endian UTF-16 into the provided `u16` buffer, |
| 1896 | /// and then returns the subslice of the buffer that contains the encoded character. |
| 1897 | /// |
| 1898 | /// Unlike `char::encode_utf16`, this method also handles codepoints in the surrogate range. |
| 1899 | /// (Creating a `char` in the surrogate range is UB.) |
| 1900 | /// |
| 1901 | /// # Panics |
| 1902 | /// |
| 1903 | /// Panics if the buffer is not large enough. |
| 1904 | /// A buffer of length 2 is large enough to encode any `char`. |
| 1905 | #[unstable (feature = "char_internals" , reason = "exposed only for libstd" , issue = "none" )] |
| 1906 | #[doc (hidden)] |
| 1907 | #[inline ] |
| 1908 | pub const fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] { |
| 1909 | let len: usize = len_utf16(code); |
| 1910 | match (len, &mut *dst) { |
| 1911 | (1, [a: &mut u16, ..]) => { |
| 1912 | *a = code as u16; |
| 1913 | } |
| 1914 | (2, [a: &mut u16, b: &mut u16, ..]) => { |
| 1915 | code -= 0x1_0000; |
| 1916 | *a = (code >> 10) as u16 | 0xD800; |
| 1917 | *b = (code & 0x3FF) as u16 | 0xDC00; |
| 1918 | } |
| 1919 | _ => { |
| 1920 | const_panic!( |
| 1921 | "encode_utf16: buffer does not have enough bytes to encode code point" , |
| 1922 | "encode_utf16: need {len} bytes to encode U+ {code:04X} but buffer has just {dst_len}" , |
| 1923 | code: u32 = code, |
| 1924 | len: usize = len, |
| 1925 | dst_len: usize = dst.len(), |
| 1926 | ) |
| 1927 | } |
| 1928 | }; |
| 1929 | // SAFETY: `<&mut [u16]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds. |
| 1930 | unsafe { slice::from_raw_parts_mut(data:dst.as_mut_ptr(), len) } |
| 1931 | } |
| 1932 | |