| 1 | // This file is part of ICU4X. For terms of use, please see the file |
| 2 | // called LICENSE at the top level of the ICU4X source tree |
| 3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
| 4 | |
| 5 | use crate::asciibyte::AsciiByte; |
| 6 | use crate::int_ops::{Aligned4, Aligned8}; |
| 7 | use crate::TinyStrError; |
| 8 | use core::fmt; |
| 9 | use core::ops::Deref; |
| 10 | use core::str::{self, FromStr}; |
| 11 | |
| 12 | #[repr (transparent)] |
| 13 | #[derive (PartialEq, Eq, Ord, PartialOrd, Copy, Clone, Hash)] |
| 14 | pub struct TinyAsciiStr<const N: usize> { |
| 15 | bytes: [AsciiByte; N], |
| 16 | } |
| 17 | |
| 18 | impl<const N: usize> TinyAsciiStr<N> { |
| 19 | /// Creates a `TinyAsciiStr<N>` from the given byte slice. |
| 20 | /// `bytes` may contain at most `N` non-null ASCII bytes. |
| 21 | pub const fn from_bytes(bytes: &[u8]) -> Result<Self, TinyStrError> { |
| 22 | Self::from_bytes_inner(bytes, 0, bytes.len(), false) |
| 23 | } |
| 24 | |
| 25 | /// Creates a `TinyAsciiStr<N>` from a byte slice, replacing invalid bytes. |
| 26 | /// |
| 27 | /// Null and non-ASCII bytes (i.e. those outside the range `0x01..=0x7F`) |
| 28 | /// will be replaced with the '?' character. |
| 29 | /// |
| 30 | /// The input slice will be truncated if its length exceeds `N`. |
| 31 | pub const fn from_bytes_lossy(bytes: &[u8]) -> Self { |
| 32 | const QUESTION: u8 = b'?' ; |
| 33 | let mut out = [0; N]; |
| 34 | let mut i = 0; |
| 35 | // Ord is not available in const, so no `.min(N)` |
| 36 | let len = if bytes.len() > N { N } else { bytes.len() }; |
| 37 | |
| 38 | // Indexing is protected by the len check above |
| 39 | #[allow (clippy::indexing_slicing)] |
| 40 | while i < len { |
| 41 | let b = bytes[i]; |
| 42 | if b > 0 && b < 0x80 { |
| 43 | out[i] = b; |
| 44 | } else { |
| 45 | out[i] = QUESTION; |
| 46 | } |
| 47 | i += 1; |
| 48 | } |
| 49 | |
| 50 | Self { |
| 51 | // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes` |
| 52 | bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) }, |
| 53 | } |
| 54 | } |
| 55 | |
| 56 | /// Attempts to parse a fixed-length byte array to a `TinyAsciiStr`. |
| 57 | /// |
| 58 | /// The byte array may contain trailing NUL bytes. |
| 59 | /// |
| 60 | /// # Example |
| 61 | /// |
| 62 | /// ``` |
| 63 | /// use tinystr::tinystr; |
| 64 | /// use tinystr::TinyAsciiStr; |
| 65 | /// |
| 66 | /// assert_eq!( |
| 67 | /// TinyAsciiStr::<3>::try_from_raw(*b"GB \0" ), |
| 68 | /// Ok(tinystr!(3, "GB" )) |
| 69 | /// ); |
| 70 | /// assert_eq!( |
| 71 | /// TinyAsciiStr::<3>::try_from_raw(*b"USD" ), |
| 72 | /// Ok(tinystr!(3, "USD" )) |
| 73 | /// ); |
| 74 | /// assert!(matches!(TinyAsciiStr::<3>::try_from_raw(*b" \0A \0" ), Err(_))); |
| 75 | /// ``` |
| 76 | pub const fn try_from_raw(raw: [u8; N]) -> Result<Self, TinyStrError> { |
| 77 | Self::from_bytes_inner(&raw, 0, N, true) |
| 78 | } |
| 79 | |
| 80 | /// Equivalent to [`from_bytes(bytes[start..end])`](Self::from_bytes), |
| 81 | /// but callable in a `const` context (which range indexing is not). |
| 82 | pub const fn from_bytes_manual_slice( |
| 83 | bytes: &[u8], |
| 84 | start: usize, |
| 85 | end: usize, |
| 86 | ) -> Result<Self, TinyStrError> { |
| 87 | Self::from_bytes_inner(bytes, start, end, false) |
| 88 | } |
| 89 | |
| 90 | #[inline ] |
| 91 | pub(crate) const fn from_bytes_inner( |
| 92 | bytes: &[u8], |
| 93 | start: usize, |
| 94 | end: usize, |
| 95 | allow_trailing_null: bool, |
| 96 | ) -> Result<Self, TinyStrError> { |
| 97 | let len = end - start; |
| 98 | if len > N { |
| 99 | return Err(TinyStrError::TooLarge { max: N, len }); |
| 100 | } |
| 101 | |
| 102 | let mut out = [0; N]; |
| 103 | let mut i = 0; |
| 104 | let mut found_null = false; |
| 105 | // Indexing is protected by TinyStrError::TooLarge |
| 106 | #[allow (clippy::indexing_slicing)] |
| 107 | while i < len { |
| 108 | let b = bytes[start + i]; |
| 109 | |
| 110 | if b == 0 { |
| 111 | found_null = true; |
| 112 | } else if b >= 0x80 { |
| 113 | return Err(TinyStrError::NonAscii); |
| 114 | } else if found_null { |
| 115 | // Error if there are contentful bytes after null |
| 116 | return Err(TinyStrError::ContainsNull); |
| 117 | } |
| 118 | out[i] = b; |
| 119 | |
| 120 | i += 1; |
| 121 | } |
| 122 | |
| 123 | if !allow_trailing_null && found_null { |
| 124 | // We found some trailing nulls, error |
| 125 | return Err(TinyStrError::ContainsNull); |
| 126 | } |
| 127 | |
| 128 | Ok(Self { |
| 129 | // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes` |
| 130 | bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) }, |
| 131 | }) |
| 132 | } |
| 133 | |
| 134 | // TODO: This function shadows the FromStr trait. Rename? |
| 135 | #[inline ] |
| 136 | pub const fn from_str(s: &str) -> Result<Self, TinyStrError> { |
| 137 | Self::from_bytes_inner(s.as_bytes(), 0, s.len(), false) |
| 138 | } |
| 139 | |
| 140 | #[inline ] |
| 141 | pub const fn as_str(&self) -> &str { |
| 142 | // as_bytes is valid utf8 |
| 143 | unsafe { str::from_utf8_unchecked(self.as_bytes()) } |
| 144 | } |
| 145 | |
| 146 | #[inline ] |
| 147 | #[must_use ] |
| 148 | pub const fn len(&self) -> usize { |
| 149 | if N <= 4 { |
| 150 | Aligned4::from_ascii_bytes(&self.bytes).len() |
| 151 | } else if N <= 8 { |
| 152 | Aligned8::from_ascii_bytes(&self.bytes).len() |
| 153 | } else { |
| 154 | let mut i = 0; |
| 155 | #[allow (clippy::indexing_slicing)] // < N is safe |
| 156 | while i < N && self.bytes[i] as u8 != AsciiByte::B0 as u8 { |
| 157 | i += 1 |
| 158 | } |
| 159 | i |
| 160 | } |
| 161 | } |
| 162 | |
| 163 | #[inline ] |
| 164 | #[must_use ] |
| 165 | pub const fn is_empty(&self) -> bool { |
| 166 | self.bytes[0] as u8 == AsciiByte::B0 as u8 |
| 167 | } |
| 168 | |
| 169 | #[inline ] |
| 170 | #[must_use ] |
| 171 | pub const fn as_bytes(&self) -> &[u8] { |
| 172 | // Safe because `self.bytes.as_slice()` pointer-casts to `&[u8]`, |
| 173 | // and changing the length of that slice to self.len() < N is safe. |
| 174 | unsafe { |
| 175 | core::slice::from_raw_parts(self.bytes.as_slice().as_ptr() as *const u8, self.len()) |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | #[inline ] |
| 180 | #[must_use ] |
| 181 | pub const fn all_bytes(&self) -> &[u8; N] { |
| 182 | // SAFETY: `self.bytes` has same size as [u8; N] |
| 183 | unsafe { &*(self.bytes.as_ptr() as *const [u8; N]) } |
| 184 | } |
| 185 | |
| 186 | #[inline ] |
| 187 | #[must_use ] |
| 188 | /// Resizes a `TinyAsciiStr<N>` to a `TinyAsciiStr<M>`. |
| 189 | /// |
| 190 | /// If `M < len()` the string gets truncated, otherwise only the |
| 191 | /// memory representation changes. |
| 192 | pub const fn resize<const M: usize>(self) -> TinyAsciiStr<M> { |
| 193 | let mut bytes = [0; M]; |
| 194 | let mut i = 0; |
| 195 | // Indexing is protected by the loop guard |
| 196 | #[allow (clippy::indexing_slicing)] |
| 197 | while i < M && i < N { |
| 198 | bytes[i] = self.bytes[i] as u8; |
| 199 | i += 1; |
| 200 | } |
| 201 | // `self.bytes` only contains ASCII bytes, with no null bytes between |
| 202 | // ASCII characters, so this also holds for `bytes`. |
| 203 | unsafe { TinyAsciiStr::from_bytes_unchecked(bytes) } |
| 204 | } |
| 205 | |
| 206 | /// # Safety |
| 207 | /// Must be called with a bytes array made of valid ASCII bytes, with no null bytes |
| 208 | /// between ASCII characters |
| 209 | #[must_use ] |
| 210 | pub const unsafe fn from_bytes_unchecked(bytes: [u8; N]) -> Self { |
| 211 | Self { |
| 212 | bytes: AsciiByte::to_ascii_byte_array(&bytes), |
| 213 | } |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | macro_rules! check_is { |
| 218 | ($self:ident, $check_int:ident, $check_u8:ident) => { |
| 219 | if N <= 4 { |
| 220 | Aligned4::from_ascii_bytes(&$self.bytes).$check_int() |
| 221 | } else if N <= 8 { |
| 222 | Aligned8::from_ascii_bytes(&$self.bytes).$check_int() |
| 223 | } else { |
| 224 | let mut i = 0; |
| 225 | // Won't panic because self.bytes has length N |
| 226 | #[allow(clippy::indexing_slicing)] |
| 227 | while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 { |
| 228 | if !($self.bytes[i] as u8).$check_u8() { |
| 229 | return false; |
| 230 | } |
| 231 | i += 1; |
| 232 | } |
| 233 | true |
| 234 | } |
| 235 | }; |
| 236 | ($self:ident, $check_int:ident, !$check_u8_0_inv:ident, !$check_u8_1_inv:ident) => { |
| 237 | if N <= 4 { |
| 238 | Aligned4::from_ascii_bytes(&$self.bytes).$check_int() |
| 239 | } else if N <= 8 { |
| 240 | Aligned8::from_ascii_bytes(&$self.bytes).$check_int() |
| 241 | } else { |
| 242 | // Won't panic because N is > 8 |
| 243 | if ($self.bytes[0] as u8).$check_u8_0_inv() { |
| 244 | return false; |
| 245 | } |
| 246 | let mut i = 1; |
| 247 | // Won't panic because self.bytes has length N |
| 248 | #[allow(clippy::indexing_slicing)] |
| 249 | while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 { |
| 250 | if ($self.bytes[i] as u8).$check_u8_1_inv() { |
| 251 | return false; |
| 252 | } |
| 253 | i += 1; |
| 254 | } |
| 255 | true |
| 256 | } |
| 257 | }; |
| 258 | ($self:ident, $check_int:ident, $check_u8_0_inv:ident, $check_u8_1_inv:ident) => { |
| 259 | if N <= 4 { |
| 260 | Aligned4::from_ascii_bytes(&$self.bytes).$check_int() |
| 261 | } else if N <= 8 { |
| 262 | Aligned8::from_ascii_bytes(&$self.bytes).$check_int() |
| 263 | } else { |
| 264 | // Won't panic because N is > 8 |
| 265 | if !($self.bytes[0] as u8).$check_u8_0_inv() { |
| 266 | return false; |
| 267 | } |
| 268 | let mut i = 1; |
| 269 | // Won't panic because self.bytes has length N |
| 270 | #[allow(clippy::indexing_slicing)] |
| 271 | while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 { |
| 272 | if !($self.bytes[i] as u8).$check_u8_1_inv() { |
| 273 | return false; |
| 274 | } |
| 275 | i += 1; |
| 276 | } |
| 277 | true |
| 278 | } |
| 279 | }; |
| 280 | } |
| 281 | |
| 282 | impl<const N: usize> TinyAsciiStr<N> { |
| 283 | /// Checks if the value is composed of ASCII alphabetic characters: |
| 284 | /// |
| 285 | /// * U+0041 'A' ..= U+005A 'Z', or |
| 286 | /// * U+0061 'a' ..= U+007A 'z'. |
| 287 | /// |
| 288 | /// # Examples |
| 289 | /// |
| 290 | /// ``` |
| 291 | /// use tinystr::TinyAsciiStr; |
| 292 | /// |
| 293 | /// let s1: TinyAsciiStr<4> = "Test" .parse().expect("Failed to parse." ); |
| 294 | /// let s2: TinyAsciiStr<4> = "Te3t" .parse().expect("Failed to parse." ); |
| 295 | /// |
| 296 | /// assert!(s1.is_ascii_alphabetic()); |
| 297 | /// assert!(!s2.is_ascii_alphabetic()); |
| 298 | /// ``` |
| 299 | #[inline ] |
| 300 | #[must_use ] |
| 301 | pub const fn is_ascii_alphabetic(&self) -> bool { |
| 302 | check_is!(self, is_ascii_alphabetic, is_ascii_alphabetic) |
| 303 | } |
| 304 | |
| 305 | /// Checks if the value is composed of ASCII alphanumeric characters: |
| 306 | /// |
| 307 | /// * U+0041 'A' ..= U+005A 'Z', or |
| 308 | /// * U+0061 'a' ..= U+007A 'z', or |
| 309 | /// * U+0030 '0' ..= U+0039 '9'. |
| 310 | /// |
| 311 | /// # Examples |
| 312 | /// |
| 313 | /// ``` |
| 314 | /// use tinystr::TinyAsciiStr; |
| 315 | /// |
| 316 | /// let s1: TinyAsciiStr<4> = "A15b" .parse().expect("Failed to parse." ); |
| 317 | /// let s2: TinyAsciiStr<4> = "[3@w" .parse().expect("Failed to parse." ); |
| 318 | /// |
| 319 | /// assert!(s1.is_ascii_alphanumeric()); |
| 320 | /// assert!(!s2.is_ascii_alphanumeric()); |
| 321 | /// ``` |
| 322 | #[inline ] |
| 323 | #[must_use ] |
| 324 | pub const fn is_ascii_alphanumeric(&self) -> bool { |
| 325 | check_is!(self, is_ascii_alphanumeric, is_ascii_alphanumeric) |
| 326 | } |
| 327 | |
| 328 | /// Checks if the value is composed of ASCII decimal digits: |
| 329 | /// |
| 330 | /// * U+0030 '0' ..= U+0039 '9'. |
| 331 | /// |
| 332 | /// # Examples |
| 333 | /// |
| 334 | /// ``` |
| 335 | /// use tinystr::TinyAsciiStr; |
| 336 | /// |
| 337 | /// let s1: TinyAsciiStr<4> = "312" .parse().expect("Failed to parse." ); |
| 338 | /// let s2: TinyAsciiStr<4> = "3d" .parse().expect("Failed to parse." ); |
| 339 | /// |
| 340 | /// assert!(s1.is_ascii_numeric()); |
| 341 | /// assert!(!s2.is_ascii_numeric()); |
| 342 | /// ``` |
| 343 | #[inline ] |
| 344 | #[must_use ] |
| 345 | pub const fn is_ascii_numeric(&self) -> bool { |
| 346 | check_is!(self, is_ascii_numeric, is_ascii_digit) |
| 347 | } |
| 348 | |
| 349 | /// Checks if the value is in ASCII lower case. |
| 350 | /// |
| 351 | /// All letter characters are checked for case. Non-letter characters are ignored. |
| 352 | /// |
| 353 | /// # Examples |
| 354 | /// |
| 355 | /// ``` |
| 356 | /// use tinystr::TinyAsciiStr; |
| 357 | /// |
| 358 | /// let s1: TinyAsciiStr<4> = "teSt" .parse().expect("Failed to parse." ); |
| 359 | /// let s2: TinyAsciiStr<4> = "test" .parse().expect("Failed to parse." ); |
| 360 | /// let s3: TinyAsciiStr<4> = "001z" .parse().expect("Failed to parse." ); |
| 361 | /// |
| 362 | /// assert!(!s1.is_ascii_lowercase()); |
| 363 | /// assert!(s2.is_ascii_lowercase()); |
| 364 | /// assert!(s3.is_ascii_lowercase()); |
| 365 | /// ``` |
| 366 | #[inline ] |
| 367 | #[must_use ] |
| 368 | pub const fn is_ascii_lowercase(&self) -> bool { |
| 369 | check_is!( |
| 370 | self, |
| 371 | is_ascii_lowercase, |
| 372 | !is_ascii_uppercase, |
| 373 | !is_ascii_uppercase |
| 374 | ) |
| 375 | } |
| 376 | |
| 377 | /// Checks if the value is in ASCII title case. |
| 378 | /// |
| 379 | /// This verifies that the first character is ASCII uppercase and all others ASCII lowercase. |
| 380 | /// Non-letter characters are ignored. |
| 381 | /// |
| 382 | /// # Examples |
| 383 | /// |
| 384 | /// ``` |
| 385 | /// use tinystr::TinyAsciiStr; |
| 386 | /// |
| 387 | /// let s1: TinyAsciiStr<4> = "teSt" .parse().expect("Failed to parse." ); |
| 388 | /// let s2: TinyAsciiStr<4> = "Test" .parse().expect("Failed to parse." ); |
| 389 | /// let s3: TinyAsciiStr<4> = "001z" .parse().expect("Failed to parse." ); |
| 390 | /// |
| 391 | /// assert!(!s1.is_ascii_titlecase()); |
| 392 | /// assert!(s2.is_ascii_titlecase()); |
| 393 | /// assert!(s3.is_ascii_titlecase()); |
| 394 | /// ``` |
| 395 | #[inline ] |
| 396 | #[must_use ] |
| 397 | pub const fn is_ascii_titlecase(&self) -> bool { |
| 398 | check_is!( |
| 399 | self, |
| 400 | is_ascii_titlecase, |
| 401 | !is_ascii_lowercase, |
| 402 | !is_ascii_uppercase |
| 403 | ) |
| 404 | } |
| 405 | |
| 406 | /// Checks if the value is in ASCII upper case. |
| 407 | /// |
| 408 | /// All letter characters are checked for case. Non-letter characters are ignored. |
| 409 | /// |
| 410 | /// # Examples |
| 411 | /// |
| 412 | /// ``` |
| 413 | /// use tinystr::TinyAsciiStr; |
| 414 | /// |
| 415 | /// let s1: TinyAsciiStr<4> = "teSt" .parse().expect("Failed to parse." ); |
| 416 | /// let s2: TinyAsciiStr<4> = "TEST" .parse().expect("Failed to parse." ); |
| 417 | /// let s3: TinyAsciiStr<4> = "001z" .parse().expect("Failed to parse." ); |
| 418 | /// |
| 419 | /// assert!(!s1.is_ascii_uppercase()); |
| 420 | /// assert!(s2.is_ascii_uppercase()); |
| 421 | /// assert!(!s3.is_ascii_uppercase()); |
| 422 | /// ``` |
| 423 | #[inline ] |
| 424 | #[must_use ] |
| 425 | pub const fn is_ascii_uppercase(&self) -> bool { |
| 426 | check_is!( |
| 427 | self, |
| 428 | is_ascii_uppercase, |
| 429 | !is_ascii_lowercase, |
| 430 | !is_ascii_lowercase |
| 431 | ) |
| 432 | } |
| 433 | |
| 434 | /// Checks if the value is composed of ASCII alphabetic lower case characters: |
| 435 | /// |
| 436 | /// * U+0061 'a' ..= U+007A 'z', |
| 437 | /// |
| 438 | /// # Examples |
| 439 | /// |
| 440 | /// ``` |
| 441 | /// use tinystr::TinyAsciiStr; |
| 442 | /// |
| 443 | /// let s1: TinyAsciiStr<4> = "Test" .parse().expect("Failed to parse." ); |
| 444 | /// let s2: TinyAsciiStr<4> = "Te3t" .parse().expect("Failed to parse." ); |
| 445 | /// let s3: TinyAsciiStr<4> = "teSt" .parse().expect("Failed to parse." ); |
| 446 | /// let s4: TinyAsciiStr<4> = "test" .parse().expect("Failed to parse." ); |
| 447 | /// let s5: TinyAsciiStr<4> = "001z" .parse().expect("Failed to parse." ); |
| 448 | /// |
| 449 | /// assert!(!s1.is_ascii_alphabetic_lowercase()); |
| 450 | /// assert!(!s2.is_ascii_alphabetic_lowercase()); |
| 451 | /// assert!(!s3.is_ascii_alphabetic_lowercase()); |
| 452 | /// assert!(s4.is_ascii_alphabetic_lowercase()); |
| 453 | /// assert!(!s5.is_ascii_alphabetic_lowercase()); |
| 454 | /// ``` |
| 455 | #[inline ] |
| 456 | #[must_use ] |
| 457 | pub const fn is_ascii_alphabetic_lowercase(&self) -> bool { |
| 458 | check_is!( |
| 459 | self, |
| 460 | is_ascii_alphabetic_lowercase, |
| 461 | is_ascii_lowercase, |
| 462 | is_ascii_lowercase |
| 463 | ) |
| 464 | } |
| 465 | |
| 466 | /// Checks if the value is composed of ASCII alphabetic, with the first character being ASCII uppercase, and all others ASCII lowercase. |
| 467 | /// |
| 468 | /// # Examples |
| 469 | /// |
| 470 | /// ``` |
| 471 | /// use tinystr::TinyAsciiStr; |
| 472 | /// |
| 473 | /// let s1: TinyAsciiStr<4> = "Test" .parse().expect("Failed to parse." ); |
| 474 | /// let s2: TinyAsciiStr<4> = "Te3t" .parse().expect("Failed to parse." ); |
| 475 | /// let s3: TinyAsciiStr<4> = "teSt" .parse().expect("Failed to parse." ); |
| 476 | /// let s4: TinyAsciiStr<4> = "test" .parse().expect("Failed to parse." ); |
| 477 | /// let s5: TinyAsciiStr<4> = "001z" .parse().expect("Failed to parse." ); |
| 478 | /// |
| 479 | /// assert!(s1.is_ascii_alphabetic_titlecase()); |
| 480 | /// assert!(!s2.is_ascii_alphabetic_titlecase()); |
| 481 | /// assert!(!s3.is_ascii_alphabetic_titlecase()); |
| 482 | /// assert!(!s4.is_ascii_alphabetic_titlecase()); |
| 483 | /// assert!(!s5.is_ascii_alphabetic_titlecase()); |
| 484 | /// ``` |
| 485 | #[inline ] |
| 486 | #[must_use ] |
| 487 | pub const fn is_ascii_alphabetic_titlecase(&self) -> bool { |
| 488 | check_is!( |
| 489 | self, |
| 490 | is_ascii_alphabetic_titlecase, |
| 491 | is_ascii_uppercase, |
| 492 | is_ascii_lowercase |
| 493 | ) |
| 494 | } |
| 495 | |
| 496 | /// Checks if the value is composed of ASCII alphabetic upper case characters: |
| 497 | /// |
| 498 | /// * U+0041 'A' ..= U+005A 'Z', |
| 499 | /// |
| 500 | /// # Examples |
| 501 | /// |
| 502 | /// ``` |
| 503 | /// use tinystr::TinyAsciiStr; |
| 504 | /// |
| 505 | /// let s1: TinyAsciiStr<4> = "Test" .parse().expect("Failed to parse." ); |
| 506 | /// let s2: TinyAsciiStr<4> = "Te3t" .parse().expect("Failed to parse." ); |
| 507 | /// let s3: TinyAsciiStr<4> = "teSt" .parse().expect("Failed to parse." ); |
| 508 | /// let s4: TinyAsciiStr<4> = "TEST" .parse().expect("Failed to parse." ); |
| 509 | /// let s5: TinyAsciiStr<4> = "001z" .parse().expect("Failed to parse." ); |
| 510 | /// |
| 511 | /// assert!(!s1.is_ascii_alphabetic_uppercase()); |
| 512 | /// assert!(!s2.is_ascii_alphabetic_uppercase()); |
| 513 | /// assert!(!s3.is_ascii_alphabetic_uppercase()); |
| 514 | /// assert!(s4.is_ascii_alphabetic_uppercase()); |
| 515 | /// assert!(!s5.is_ascii_alphabetic_uppercase()); |
| 516 | /// ``` |
| 517 | #[inline ] |
| 518 | #[must_use ] |
| 519 | pub const fn is_ascii_alphabetic_uppercase(&self) -> bool { |
| 520 | check_is!( |
| 521 | self, |
| 522 | is_ascii_alphabetic_uppercase, |
| 523 | is_ascii_uppercase, |
| 524 | is_ascii_uppercase |
| 525 | ) |
| 526 | } |
| 527 | } |
| 528 | |
| 529 | macro_rules! to { |
| 530 | ($self:ident, $to:ident, $later_char_to:ident $(,$first_char_to:ident)?) => {{ |
| 531 | let mut i = 0; |
| 532 | if N <= 4 { |
| 533 | let aligned = Aligned4::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes(); |
| 534 | // Won't panic because self.bytes has length N and aligned has length >= N |
| 535 | #[allow(clippy::indexing_slicing)] |
| 536 | while i < N { |
| 537 | $self.bytes[i] = aligned[i]; |
| 538 | i += 1; |
| 539 | } |
| 540 | } else if N <= 8 { |
| 541 | let aligned = Aligned8::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes(); |
| 542 | // Won't panic because self.bytes has length N and aligned has length >= N |
| 543 | #[allow(clippy::indexing_slicing)] |
| 544 | while i < N { |
| 545 | $self.bytes[i] = aligned[i]; |
| 546 | i += 1; |
| 547 | } |
| 548 | } else { |
| 549 | // Won't panic because self.bytes has length N |
| 550 | #[allow(clippy::indexing_slicing)] |
| 551 | while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 { |
| 552 | // SAFETY: AsciiByte is repr(u8) and has same size as u8 |
| 553 | unsafe { |
| 554 | $self.bytes[i] = core::mem::transmute::<u8, AsciiByte>( |
| 555 | ($self.bytes[i] as u8).$later_char_to() |
| 556 | ); |
| 557 | } |
| 558 | i += 1; |
| 559 | } |
| 560 | // SAFETY: AsciiByte is repr(u8) and has same size as u8 |
| 561 | $( |
| 562 | $self.bytes[0] = unsafe { |
| 563 | core::mem::transmute::<u8, AsciiByte>(($self.bytes[0] as u8).$first_char_to()) |
| 564 | }; |
| 565 | )? |
| 566 | } |
| 567 | $self |
| 568 | }}; |
| 569 | } |
| 570 | |
| 571 | impl<const N: usize> TinyAsciiStr<N> { |
| 572 | /// Converts this type to its ASCII lower case equivalent in-place. |
| 573 | /// |
| 574 | /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', other characters are unchanged. |
| 575 | /// |
| 576 | /// # Examples |
| 577 | /// |
| 578 | /// ``` |
| 579 | /// use tinystr::TinyAsciiStr; |
| 580 | /// |
| 581 | /// let s1: TinyAsciiStr<4> = "TeS3" .parse().expect("Failed to parse." ); |
| 582 | /// |
| 583 | /// assert_eq!(&*s1.to_ascii_lowercase(), "tes3" ); |
| 584 | /// ``` |
| 585 | #[inline ] |
| 586 | #[must_use ] |
| 587 | pub const fn to_ascii_lowercase(mut self) -> Self { |
| 588 | to!(self, to_ascii_lowercase, to_ascii_lowercase) |
| 589 | } |
| 590 | |
| 591 | /// Converts this type to its ASCII title case equivalent in-place. |
| 592 | /// |
| 593 | /// The first character is converted to ASCII uppercase; the remaining characters |
| 594 | /// are converted to ASCII lowercase. |
| 595 | /// |
| 596 | /// # Examples |
| 597 | /// |
| 598 | /// ``` |
| 599 | /// use tinystr::TinyAsciiStr; |
| 600 | /// |
| 601 | /// let s1: TinyAsciiStr<4> = "teSt" .parse().expect("Failed to parse." ); |
| 602 | /// |
| 603 | /// assert_eq!(&*s1.to_ascii_titlecase(), "Test" ); |
| 604 | /// ``` |
| 605 | #[inline ] |
| 606 | #[must_use ] |
| 607 | pub const fn to_ascii_titlecase(mut self) -> Self { |
| 608 | to!( |
| 609 | self, |
| 610 | to_ascii_titlecase, |
| 611 | to_ascii_lowercase, |
| 612 | to_ascii_uppercase |
| 613 | ) |
| 614 | } |
| 615 | |
| 616 | /// Converts this type to its ASCII upper case equivalent in-place. |
| 617 | /// |
| 618 | /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', other characters are unchanged. |
| 619 | /// |
| 620 | /// # Examples |
| 621 | /// |
| 622 | /// ``` |
| 623 | /// use tinystr::TinyAsciiStr; |
| 624 | /// |
| 625 | /// let s1: TinyAsciiStr<4> = "Tes3" .parse().expect("Failed to parse." ); |
| 626 | /// |
| 627 | /// assert_eq!(&*s1.to_ascii_uppercase(), "TES3" ); |
| 628 | /// ``` |
| 629 | #[inline ] |
| 630 | #[must_use ] |
| 631 | pub const fn to_ascii_uppercase(mut self) -> Self { |
| 632 | to!(self, to_ascii_uppercase, to_ascii_uppercase) |
| 633 | } |
| 634 | } |
| 635 | |
| 636 | impl<const N: usize> fmt::Debug for TinyAsciiStr<N> { |
| 637 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| 638 | fmt::Debug::fmt(self.as_str(), f) |
| 639 | } |
| 640 | } |
| 641 | |
| 642 | impl<const N: usize> fmt::Display for TinyAsciiStr<N> { |
| 643 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| 644 | fmt::Display::fmt(self.as_str(), f) |
| 645 | } |
| 646 | } |
| 647 | |
| 648 | impl<const N: usize> Deref for TinyAsciiStr<N> { |
| 649 | type Target = str; |
| 650 | #[inline ] |
| 651 | fn deref(&self) -> &str { |
| 652 | self.as_str() |
| 653 | } |
| 654 | } |
| 655 | |
| 656 | impl<const N: usize> FromStr for TinyAsciiStr<N> { |
| 657 | type Err = TinyStrError; |
| 658 | #[inline ] |
| 659 | fn from_str(s: &str) -> Result<Self, Self::Err> { |
| 660 | Self::from_str(s) |
| 661 | } |
| 662 | } |
| 663 | |
| 664 | impl<const N: usize> PartialEq<str> for TinyAsciiStr<N> { |
| 665 | fn eq(&self, other: &str) -> bool { |
| 666 | self.deref() == other |
| 667 | } |
| 668 | } |
| 669 | |
| 670 | impl<const N: usize> PartialEq<&str> for TinyAsciiStr<N> { |
| 671 | fn eq(&self, other: &&str) -> bool { |
| 672 | self.deref() == *other |
| 673 | } |
| 674 | } |
| 675 | |
| 676 | #[cfg (feature = "alloc" )] |
| 677 | impl<const N: usize> PartialEq<alloc::string::String> for TinyAsciiStr<N> { |
| 678 | fn eq(&self, other: &alloc::string::String) -> bool { |
| 679 | self.deref() == other.deref() |
| 680 | } |
| 681 | } |
| 682 | |
| 683 | #[cfg (feature = "alloc" )] |
| 684 | impl<const N: usize> PartialEq<TinyAsciiStr<N>> for alloc::string::String { |
| 685 | fn eq(&self, other: &TinyAsciiStr<N>) -> bool { |
| 686 | self.deref() == other.deref() |
| 687 | } |
| 688 | } |
| 689 | |
| 690 | #[cfg (test)] |
| 691 | mod test { |
| 692 | use super::*; |
| 693 | use rand::distributions::Distribution; |
| 694 | use rand::distributions::Standard; |
| 695 | use rand::rngs::SmallRng; |
| 696 | use rand::seq::SliceRandom; |
| 697 | use rand::SeedableRng; |
| 698 | |
| 699 | const STRINGS: [&str; 26] = [ |
| 700 | "Latn" , |
| 701 | "laTn" , |
| 702 | "windows" , |
| 703 | "AR" , |
| 704 | "Hans" , |
| 705 | "macos" , |
| 706 | "AT" , |
| 707 | "infiniband" , |
| 708 | "FR" , |
| 709 | "en" , |
| 710 | "Cyrl" , |
| 711 | "FromIntegral" , |
| 712 | "NO" , |
| 713 | "419" , |
| 714 | "MacintoshOSX2019" , |
| 715 | "a3z" , |
| 716 | "A3z" , |
| 717 | "A3Z" , |
| 718 | "a3Z" , |
| 719 | "3A" , |
| 720 | "3Z" , |
| 721 | "3a" , |
| 722 | "3z" , |
| 723 | "@@[`{" , |
| 724 | "UK" , |
| 725 | "E12" , |
| 726 | ]; |
| 727 | |
| 728 | fn gen_strings(num_strings: usize, allowed_lengths: &[usize]) -> Vec<String> { |
| 729 | let mut rng = SmallRng::seed_from_u64(2022); |
| 730 | // Need to do this in 2 steps since the RNG is needed twice |
| 731 | let string_lengths = core::iter::repeat_with(|| *allowed_lengths.choose(&mut rng).unwrap()) |
| 732 | .take(num_strings) |
| 733 | .collect::<Vec<usize>>(); |
| 734 | string_lengths |
| 735 | .iter() |
| 736 | .map(|len| { |
| 737 | Standard |
| 738 | .sample_iter(&mut rng) |
| 739 | .filter(|b: &u8| *b > 0 && *b < 0x80) |
| 740 | .take(*len) |
| 741 | .collect::<Vec<u8>>() |
| 742 | }) |
| 743 | .map(|byte_vec| String::from_utf8(byte_vec).expect("All ASCII" )) |
| 744 | .collect() |
| 745 | } |
| 746 | |
| 747 | fn check_operation<T, F1, F2, const N: usize>(reference_f: F1, tinystr_f: F2) |
| 748 | where |
| 749 | F1: Fn(&str) -> T, |
| 750 | F2: Fn(TinyAsciiStr<N>) -> T, |
| 751 | T: core::fmt::Debug + core::cmp::PartialEq, |
| 752 | { |
| 753 | for s in STRINGS |
| 754 | .into_iter() |
| 755 | .map(str::to_owned) |
| 756 | .chain(gen_strings(100, &[3, 4, 5, 8, 12])) |
| 757 | { |
| 758 | let t = match TinyAsciiStr::<N>::from_str(&s) { |
| 759 | Ok(t) => t, |
| 760 | Err(TinyStrError::TooLarge { .. }) => continue, |
| 761 | Err(e) => panic!("{}" , e), |
| 762 | }; |
| 763 | let expected = reference_f(&s); |
| 764 | let actual = tinystr_f(t); |
| 765 | assert_eq!(expected, actual, "TinyAsciiStr<{N}>: {s:?}" ); |
| 766 | } |
| 767 | } |
| 768 | |
| 769 | #[test ] |
| 770 | fn test_is_ascii_alphabetic() { |
| 771 | fn check<const N: usize>() { |
| 772 | check_operation( |
| 773 | |s| s.chars().all(|c| c.is_ascii_alphabetic()), |
| 774 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic(&t), |
| 775 | ) |
| 776 | } |
| 777 | check::<2>(); |
| 778 | check::<3>(); |
| 779 | check::<4>(); |
| 780 | check::<5>(); |
| 781 | check::<8>(); |
| 782 | check::<16>(); |
| 783 | } |
| 784 | |
| 785 | #[test ] |
| 786 | fn test_is_ascii_alphanumeric() { |
| 787 | fn check<const N: usize>() { |
| 788 | check_operation( |
| 789 | |s| s.chars().all(|c| c.is_ascii_alphanumeric()), |
| 790 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphanumeric(&t), |
| 791 | ) |
| 792 | } |
| 793 | check::<2>(); |
| 794 | check::<3>(); |
| 795 | check::<4>(); |
| 796 | check::<5>(); |
| 797 | check::<8>(); |
| 798 | check::<16>(); |
| 799 | } |
| 800 | |
| 801 | #[test ] |
| 802 | fn test_is_ascii_numeric() { |
| 803 | fn check<const N: usize>() { |
| 804 | check_operation( |
| 805 | |s| s.chars().all(|c| c.is_ascii_digit()), |
| 806 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_numeric(&t), |
| 807 | ) |
| 808 | } |
| 809 | check::<2>(); |
| 810 | check::<3>(); |
| 811 | check::<4>(); |
| 812 | check::<5>(); |
| 813 | check::<8>(); |
| 814 | check::<16>(); |
| 815 | } |
| 816 | |
| 817 | #[test ] |
| 818 | fn test_is_ascii_lowercase() { |
| 819 | fn check<const N: usize>() { |
| 820 | check_operation( |
| 821 | |s| { |
| 822 | s == TinyAsciiStr::<16>::from_str(s) |
| 823 | .unwrap() |
| 824 | .to_ascii_lowercase() |
| 825 | .as_str() |
| 826 | }, |
| 827 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_lowercase(&t), |
| 828 | ) |
| 829 | } |
| 830 | check::<2>(); |
| 831 | check::<3>(); |
| 832 | check::<4>(); |
| 833 | check::<5>(); |
| 834 | check::<8>(); |
| 835 | check::<16>(); |
| 836 | } |
| 837 | |
| 838 | #[test ] |
| 839 | fn test_is_ascii_titlecase() { |
| 840 | fn check<const N: usize>() { |
| 841 | check_operation( |
| 842 | |s| { |
| 843 | s == TinyAsciiStr::<16>::from_str(s) |
| 844 | .unwrap() |
| 845 | .to_ascii_titlecase() |
| 846 | .as_str() |
| 847 | }, |
| 848 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_titlecase(&t), |
| 849 | ) |
| 850 | } |
| 851 | check::<2>(); |
| 852 | check::<3>(); |
| 853 | check::<4>(); |
| 854 | check::<5>(); |
| 855 | check::<8>(); |
| 856 | check::<16>(); |
| 857 | } |
| 858 | |
| 859 | #[test ] |
| 860 | fn test_is_ascii_uppercase() { |
| 861 | fn check<const N: usize>() { |
| 862 | check_operation( |
| 863 | |s| { |
| 864 | s == TinyAsciiStr::<16>::from_str(s) |
| 865 | .unwrap() |
| 866 | .to_ascii_uppercase() |
| 867 | .as_str() |
| 868 | }, |
| 869 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_uppercase(&t), |
| 870 | ) |
| 871 | } |
| 872 | check::<2>(); |
| 873 | check::<3>(); |
| 874 | check::<4>(); |
| 875 | check::<5>(); |
| 876 | check::<8>(); |
| 877 | check::<16>(); |
| 878 | } |
| 879 | |
| 880 | #[test ] |
| 881 | fn test_is_ascii_alphabetic_lowercase() { |
| 882 | fn check<const N: usize>() { |
| 883 | check_operation( |
| 884 | |s| { |
| 885 | // Check alphabetic |
| 886 | s.chars().all(|c| c.is_ascii_alphabetic()) && |
| 887 | // Check lowercase |
| 888 | s == TinyAsciiStr::<16>::from_str(s) |
| 889 | .unwrap() |
| 890 | .to_ascii_lowercase() |
| 891 | .as_str() |
| 892 | }, |
| 893 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_lowercase(&t), |
| 894 | ) |
| 895 | } |
| 896 | check::<2>(); |
| 897 | check::<3>(); |
| 898 | check::<4>(); |
| 899 | check::<5>(); |
| 900 | check::<8>(); |
| 901 | check::<16>(); |
| 902 | } |
| 903 | |
| 904 | #[test ] |
| 905 | fn test_is_ascii_alphabetic_titlecase() { |
| 906 | fn check<const N: usize>() { |
| 907 | check_operation( |
| 908 | |s| { |
| 909 | // Check alphabetic |
| 910 | s.chars().all(|c| c.is_ascii_alphabetic()) && |
| 911 | // Check titlecase |
| 912 | s == TinyAsciiStr::<16>::from_str(s) |
| 913 | .unwrap() |
| 914 | .to_ascii_titlecase() |
| 915 | .as_str() |
| 916 | }, |
| 917 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_titlecase(&t), |
| 918 | ) |
| 919 | } |
| 920 | check::<2>(); |
| 921 | check::<3>(); |
| 922 | check::<4>(); |
| 923 | check::<5>(); |
| 924 | check::<8>(); |
| 925 | check::<16>(); |
| 926 | } |
| 927 | |
| 928 | #[test ] |
| 929 | fn test_is_ascii_alphabetic_uppercase() { |
| 930 | fn check<const N: usize>() { |
| 931 | check_operation( |
| 932 | |s| { |
| 933 | // Check alphabetic |
| 934 | s.chars().all(|c| c.is_ascii_alphabetic()) && |
| 935 | // Check uppercase |
| 936 | s == TinyAsciiStr::<16>::from_str(s) |
| 937 | .unwrap() |
| 938 | .to_ascii_uppercase() |
| 939 | .as_str() |
| 940 | }, |
| 941 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_uppercase(&t), |
| 942 | ) |
| 943 | } |
| 944 | check::<2>(); |
| 945 | check::<3>(); |
| 946 | check::<4>(); |
| 947 | check::<5>(); |
| 948 | check::<8>(); |
| 949 | check::<16>(); |
| 950 | } |
| 951 | |
| 952 | #[test ] |
| 953 | fn test_to_ascii_lowercase() { |
| 954 | fn check<const N: usize>() { |
| 955 | check_operation( |
| 956 | |s| { |
| 957 | s.chars() |
| 958 | .map(|c| c.to_ascii_lowercase()) |
| 959 | .collect::<String>() |
| 960 | }, |
| 961 | |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_lowercase(t).as_str().to_owned(), |
| 962 | ) |
| 963 | } |
| 964 | check::<2>(); |
| 965 | check::<3>(); |
| 966 | check::<4>(); |
| 967 | check::<5>(); |
| 968 | check::<8>(); |
| 969 | check::<16>(); |
| 970 | } |
| 971 | |
| 972 | #[test ] |
| 973 | fn test_to_ascii_titlecase() { |
| 974 | fn check<const N: usize>() { |
| 975 | check_operation( |
| 976 | |s| { |
| 977 | let mut r = s |
| 978 | .chars() |
| 979 | .map(|c| c.to_ascii_lowercase()) |
| 980 | .collect::<String>(); |
| 981 | // Safe because the string is nonempty and an ASCII string |
| 982 | unsafe { r.as_bytes_mut()[0].make_ascii_uppercase() }; |
| 983 | r |
| 984 | }, |
| 985 | |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_titlecase(t).as_str().to_owned(), |
| 986 | ) |
| 987 | } |
| 988 | check::<2>(); |
| 989 | check::<3>(); |
| 990 | check::<4>(); |
| 991 | check::<5>(); |
| 992 | check::<8>(); |
| 993 | check::<16>(); |
| 994 | } |
| 995 | |
| 996 | #[test ] |
| 997 | fn test_to_ascii_uppercase() { |
| 998 | fn check<const N: usize>() { |
| 999 | check_operation( |
| 1000 | |s| { |
| 1001 | s.chars() |
| 1002 | .map(|c| c.to_ascii_uppercase()) |
| 1003 | .collect::<String>() |
| 1004 | }, |
| 1005 | |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_uppercase(t).as_str().to_owned(), |
| 1006 | ) |
| 1007 | } |
| 1008 | check::<2>(); |
| 1009 | check::<3>(); |
| 1010 | check::<4>(); |
| 1011 | check::<5>(); |
| 1012 | check::<8>(); |
| 1013 | check::<16>(); |
| 1014 | } |
| 1015 | |
| 1016 | #[test ] |
| 1017 | fn lossy_constructor() { |
| 1018 | assert_eq!(TinyAsciiStr::<4>::from_bytes_lossy(b"" ).as_str(), "" ); |
| 1019 | assert_eq!( |
| 1020 | TinyAsciiStr::<4>::from_bytes_lossy(b"oh \0o" ).as_str(), |
| 1021 | "oh?o" |
| 1022 | ); |
| 1023 | assert_eq!(TinyAsciiStr::<4>::from_bytes_lossy(b" \0" ).as_str(), "?" ); |
| 1024 | assert_eq!( |
| 1025 | TinyAsciiStr::<4>::from_bytes_lossy(b"toolong" ).as_str(), |
| 1026 | "tool" |
| 1027 | ); |
| 1028 | assert_eq!( |
| 1029 | TinyAsciiStr::<4>::from_bytes_lossy(&[b'a' , 0x80, 0xFF, b'1' ]).as_str(), |
| 1030 | "a??1" |
| 1031 | ); |
| 1032 | } |
| 1033 | } |
| 1034 | |