1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | use crate::asciibyte::AsciiByte; |
6 | use crate::int_ops::{Aligned4, Aligned8}; |
7 | use crate::TinyStrError; |
8 | use core::fmt; |
9 | use core::ops::Deref; |
10 | use core::str::{self, FromStr}; |
11 | |
12 | #[repr (transparent)] |
13 | #[derive (PartialEq, Eq, Ord, PartialOrd, Copy, Clone, Hash)] |
14 | pub struct TinyAsciiStr<const N: usize> { |
15 | bytes: [AsciiByte; N], |
16 | } |
17 | |
18 | impl<const N: usize> TinyAsciiStr<N> { |
19 | /// Creates a `TinyAsciiStr<N>` from the given byte slice. |
20 | /// `bytes` may contain at most `N` non-null ASCII bytes. |
21 | pub const fn from_bytes(bytes: &[u8]) -> Result<Self, TinyStrError> { |
22 | Self::from_bytes_inner(bytes, 0, bytes.len(), false) |
23 | } |
24 | |
25 | /// Attempts to parse a fixed-length byte array to a `TinyAsciiStr`. |
26 | /// |
27 | /// The byte array may contain trailing NUL bytes. |
28 | /// |
29 | /// # Example |
30 | /// |
31 | /// ``` |
32 | /// use tinystr::tinystr; |
33 | /// use tinystr::TinyAsciiStr; |
34 | /// |
35 | /// assert_eq!( |
36 | /// TinyAsciiStr::<3>::try_from_raw(*b"GB \0" ), |
37 | /// Ok(tinystr!(3, "GB" )) |
38 | /// ); |
39 | /// assert_eq!( |
40 | /// TinyAsciiStr::<3>::try_from_raw(*b"USD" ), |
41 | /// Ok(tinystr!(3, "USD" )) |
42 | /// ); |
43 | /// assert!(matches!(TinyAsciiStr::<3>::try_from_raw(*b" \0A \0" ), Err(_))); |
44 | /// ``` |
45 | pub const fn try_from_raw(raw: [u8; N]) -> Result<Self, TinyStrError> { |
46 | Self::from_bytes_inner(&raw, 0, N, true) |
47 | } |
48 | |
49 | /// Equivalent to [`from_bytes(bytes[start..end])`](Self::from_bytes), |
50 | /// but callable in a `const` context (which range indexing is not). |
51 | pub const fn from_bytes_manual_slice( |
52 | bytes: &[u8], |
53 | start: usize, |
54 | end: usize, |
55 | ) -> Result<Self, TinyStrError> { |
56 | Self::from_bytes_inner(bytes, start, end, false) |
57 | } |
58 | |
59 | #[inline ] |
60 | pub(crate) const fn from_bytes_inner( |
61 | bytes: &[u8], |
62 | start: usize, |
63 | end: usize, |
64 | allow_trailing_null: bool, |
65 | ) -> Result<Self, TinyStrError> { |
66 | let len = end - start; |
67 | if len > N { |
68 | return Err(TinyStrError::TooLarge { max: N, len }); |
69 | } |
70 | |
71 | let mut out = [0; N]; |
72 | let mut i = 0; |
73 | let mut found_null = false; |
74 | // Indexing is protected by TinyStrError::TooLarge |
75 | #[allow (clippy::indexing_slicing)] |
76 | while i < len { |
77 | let b = bytes[start + i]; |
78 | |
79 | if b == 0 { |
80 | found_null = true; |
81 | } else if b >= 0x80 { |
82 | return Err(TinyStrError::NonAscii); |
83 | } else if found_null { |
84 | // Error if there are contentful bytes after null |
85 | return Err(TinyStrError::ContainsNull); |
86 | } |
87 | out[i] = b; |
88 | |
89 | i += 1; |
90 | } |
91 | |
92 | if !allow_trailing_null && found_null { |
93 | // We found some trailing nulls, error |
94 | return Err(TinyStrError::ContainsNull); |
95 | } |
96 | |
97 | Ok(Self { |
98 | // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes` |
99 | bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) }, |
100 | }) |
101 | } |
102 | |
103 | // TODO: This function shadows the FromStr trait. Rename? |
104 | #[inline ] |
105 | pub const fn from_str(s: &str) -> Result<Self, TinyStrError> { |
106 | Self::from_bytes_inner(s.as_bytes(), 0, s.len(), false) |
107 | } |
108 | |
109 | #[inline ] |
110 | pub const fn as_str(&self) -> &str { |
111 | // as_bytes is valid utf8 |
112 | unsafe { str::from_utf8_unchecked(self.as_bytes()) } |
113 | } |
114 | |
115 | #[inline ] |
116 | #[must_use ] |
117 | pub const fn len(&self) -> usize { |
118 | if N <= 4 { |
119 | Aligned4::from_ascii_bytes(&self.bytes).len() |
120 | } else if N <= 8 { |
121 | Aligned8::from_ascii_bytes(&self.bytes).len() |
122 | } else { |
123 | let mut i = 0; |
124 | #[allow (clippy::indexing_slicing)] // < N is safe |
125 | while i < N && self.bytes[i] as u8 != AsciiByte::B0 as u8 { |
126 | i += 1 |
127 | } |
128 | i |
129 | } |
130 | } |
131 | |
132 | #[inline ] |
133 | #[must_use ] |
134 | pub const fn is_empty(&self) -> bool { |
135 | self.bytes[0] as u8 == AsciiByte::B0 as u8 |
136 | } |
137 | |
138 | #[inline ] |
139 | #[must_use ] |
140 | pub const fn as_bytes(&self) -> &[u8] { |
141 | // Safe because `self.bytes.as_slice()` pointer-casts to `&[u8]`, |
142 | // and changing the length of that slice to self.len() < N is safe. |
143 | unsafe { |
144 | core::slice::from_raw_parts(self.bytes.as_slice().as_ptr() as *const u8, self.len()) |
145 | } |
146 | } |
147 | |
148 | #[inline ] |
149 | #[must_use ] |
150 | pub const fn all_bytes(&self) -> &[u8; N] { |
151 | // SAFETY: `self.bytes` has same size as [u8; N] |
152 | unsafe { &*(self.bytes.as_ptr() as *const [u8; N]) } |
153 | } |
154 | |
155 | #[inline ] |
156 | #[must_use ] |
157 | /// Resizes a `TinyAsciiStr<N>` to a `TinyAsciiStr<M>`. |
158 | /// |
159 | /// If `M < len()` the string gets truncated, otherwise only the |
160 | /// memory representation changes. |
161 | pub const fn resize<const M: usize>(self) -> TinyAsciiStr<M> { |
162 | let mut bytes = [0; M]; |
163 | let mut i = 0; |
164 | // Indexing is protected by the loop guard |
165 | #[allow (clippy::indexing_slicing)] |
166 | while i < M && i < N { |
167 | bytes[i] = self.bytes[i] as u8; |
168 | i += 1; |
169 | } |
170 | // `self.bytes` only contains ASCII bytes, with no null bytes between |
171 | // ASCII characters, so this also holds for `bytes`. |
172 | unsafe { TinyAsciiStr::from_bytes_unchecked(bytes) } |
173 | } |
174 | |
175 | /// # Safety |
176 | /// Must be called with a bytes array made of valid ASCII bytes, with no null bytes |
177 | /// between ASCII characters |
178 | #[must_use ] |
179 | pub const unsafe fn from_bytes_unchecked(bytes: [u8; N]) -> Self { |
180 | Self { |
181 | bytes: AsciiByte::to_ascii_byte_array(&bytes), |
182 | } |
183 | } |
184 | } |
185 | |
186 | macro_rules! check_is { |
187 | ($self:ident, $check_int:ident, $check_u8:ident) => { |
188 | if N <= 4 { |
189 | Aligned4::from_ascii_bytes(&$self.bytes).$check_int() |
190 | } else if N <= 8 { |
191 | Aligned8::from_ascii_bytes(&$self.bytes).$check_int() |
192 | } else { |
193 | let mut i = 0; |
194 | // Won't panic because self.bytes has length N |
195 | #[allow(clippy::indexing_slicing)] |
196 | while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 { |
197 | if !($self.bytes[i] as u8).$check_u8() { |
198 | return false; |
199 | } |
200 | i += 1; |
201 | } |
202 | true |
203 | } |
204 | }; |
205 | ($self:ident, $check_int:ident, !$check_u8_0_inv:ident, !$check_u8_1_inv:ident) => { |
206 | if N <= 4 { |
207 | Aligned4::from_ascii_bytes(&$self.bytes).$check_int() |
208 | } else if N <= 8 { |
209 | Aligned8::from_ascii_bytes(&$self.bytes).$check_int() |
210 | } else { |
211 | // Won't panic because N is > 8 |
212 | if ($self.bytes[0] as u8).$check_u8_0_inv() { |
213 | return false; |
214 | } |
215 | let mut i = 1; |
216 | // Won't panic because self.bytes has length N |
217 | #[allow(clippy::indexing_slicing)] |
218 | while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 { |
219 | if ($self.bytes[i] as u8).$check_u8_1_inv() { |
220 | return false; |
221 | } |
222 | i += 1; |
223 | } |
224 | true |
225 | } |
226 | }; |
227 | ($self:ident, $check_int:ident, $check_u8_0_inv:ident, $check_u8_1_inv:ident) => { |
228 | if N <= 4 { |
229 | Aligned4::from_ascii_bytes(&$self.bytes).$check_int() |
230 | } else if N <= 8 { |
231 | Aligned8::from_ascii_bytes(&$self.bytes).$check_int() |
232 | } else { |
233 | // Won't panic because N is > 8 |
234 | if !($self.bytes[0] as u8).$check_u8_0_inv() { |
235 | return false; |
236 | } |
237 | let mut i = 1; |
238 | // Won't panic because self.bytes has length N |
239 | #[allow(clippy::indexing_slicing)] |
240 | while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 { |
241 | if !($self.bytes[i] as u8).$check_u8_1_inv() { |
242 | return false; |
243 | } |
244 | i += 1; |
245 | } |
246 | true |
247 | } |
248 | }; |
249 | } |
250 | |
251 | impl<const N: usize> TinyAsciiStr<N> { |
252 | /// Checks if the value is composed of ASCII alphabetic characters: |
253 | /// |
254 | /// * U+0041 'A' ..= U+005A 'Z', or |
255 | /// * U+0061 'a' ..= U+007A 'z'. |
256 | /// |
257 | /// # Examples |
258 | /// |
259 | /// ``` |
260 | /// use tinystr::TinyAsciiStr; |
261 | /// |
262 | /// let s1: TinyAsciiStr<4> = "Test" .parse().expect("Failed to parse." ); |
263 | /// let s2: TinyAsciiStr<4> = "Te3t" .parse().expect("Failed to parse." ); |
264 | /// |
265 | /// assert!(s1.is_ascii_alphabetic()); |
266 | /// assert!(!s2.is_ascii_alphabetic()); |
267 | /// ``` |
268 | #[inline ] |
269 | #[must_use ] |
270 | pub const fn is_ascii_alphabetic(&self) -> bool { |
271 | check_is!(self, is_ascii_alphabetic, is_ascii_alphabetic) |
272 | } |
273 | |
274 | /// Checks if the value is composed of ASCII alphanumeric characters: |
275 | /// |
276 | /// * U+0041 'A' ..= U+005A 'Z', or |
277 | /// * U+0061 'a' ..= U+007A 'z', or |
278 | /// * U+0030 '0' ..= U+0039 '9'. |
279 | /// |
280 | /// # Examples |
281 | /// |
282 | /// ``` |
283 | /// use tinystr::TinyAsciiStr; |
284 | /// |
285 | /// let s1: TinyAsciiStr<4> = "A15b" .parse().expect("Failed to parse." ); |
286 | /// let s2: TinyAsciiStr<4> = "[3@w" .parse().expect("Failed to parse." ); |
287 | /// |
288 | /// assert!(s1.is_ascii_alphanumeric()); |
289 | /// assert!(!s2.is_ascii_alphanumeric()); |
290 | /// ``` |
291 | #[inline ] |
292 | #[must_use ] |
293 | pub const fn is_ascii_alphanumeric(&self) -> bool { |
294 | check_is!(self, is_ascii_alphanumeric, is_ascii_alphanumeric) |
295 | } |
296 | |
297 | /// Checks if the value is composed of ASCII decimal digits: |
298 | /// |
299 | /// * U+0030 '0' ..= U+0039 '9'. |
300 | /// |
301 | /// # Examples |
302 | /// |
303 | /// ``` |
304 | /// use tinystr::TinyAsciiStr; |
305 | /// |
306 | /// let s1: TinyAsciiStr<4> = "312" .parse().expect("Failed to parse." ); |
307 | /// let s2: TinyAsciiStr<4> = "3d" .parse().expect("Failed to parse." ); |
308 | /// |
309 | /// assert!(s1.is_ascii_numeric()); |
310 | /// assert!(!s2.is_ascii_numeric()); |
311 | /// ``` |
312 | #[inline ] |
313 | #[must_use ] |
314 | pub const fn is_ascii_numeric(&self) -> bool { |
315 | check_is!(self, is_ascii_numeric, is_ascii_digit) |
316 | } |
317 | |
318 | /// Checks if the value is in ASCII lower case. |
319 | /// |
320 | /// All letter characters are checked for case. Non-letter characters are ignored. |
321 | /// |
322 | /// # Examples |
323 | /// |
324 | /// ``` |
325 | /// use tinystr::TinyAsciiStr; |
326 | /// |
327 | /// let s1: TinyAsciiStr<4> = "teSt" .parse().expect("Failed to parse." ); |
328 | /// let s2: TinyAsciiStr<4> = "test" .parse().expect("Failed to parse." ); |
329 | /// let s3: TinyAsciiStr<4> = "001z" .parse().expect("Failed to parse." ); |
330 | /// |
331 | /// assert!(!s1.is_ascii_lowercase()); |
332 | /// assert!(s2.is_ascii_lowercase()); |
333 | /// assert!(s3.is_ascii_lowercase()); |
334 | /// ``` |
335 | #[inline ] |
336 | #[must_use ] |
337 | pub const fn is_ascii_lowercase(&self) -> bool { |
338 | check_is!( |
339 | self, |
340 | is_ascii_lowercase, |
341 | !is_ascii_uppercase, |
342 | !is_ascii_uppercase |
343 | ) |
344 | } |
345 | |
346 | /// Checks if the value is in ASCII title case. |
347 | /// |
348 | /// This verifies that the first character is ASCII uppercase and all others ASCII lowercase. |
349 | /// Non-letter characters are ignored. |
350 | /// |
351 | /// # Examples |
352 | /// |
353 | /// ``` |
354 | /// use tinystr::TinyAsciiStr; |
355 | /// |
356 | /// let s1: TinyAsciiStr<4> = "teSt" .parse().expect("Failed to parse." ); |
357 | /// let s2: TinyAsciiStr<4> = "Test" .parse().expect("Failed to parse." ); |
358 | /// let s3: TinyAsciiStr<4> = "001z" .parse().expect("Failed to parse." ); |
359 | /// |
360 | /// assert!(!s1.is_ascii_titlecase()); |
361 | /// assert!(s2.is_ascii_titlecase()); |
362 | /// assert!(s3.is_ascii_titlecase()); |
363 | /// ``` |
364 | #[inline ] |
365 | #[must_use ] |
366 | pub const fn is_ascii_titlecase(&self) -> bool { |
367 | check_is!( |
368 | self, |
369 | is_ascii_titlecase, |
370 | !is_ascii_lowercase, |
371 | !is_ascii_uppercase |
372 | ) |
373 | } |
374 | |
375 | /// Checks if the value is in ASCII upper case. |
376 | /// |
377 | /// All letter characters are checked for case. Non-letter characters are ignored. |
378 | /// |
379 | /// # Examples |
380 | /// |
381 | /// ``` |
382 | /// use tinystr::TinyAsciiStr; |
383 | /// |
384 | /// let s1: TinyAsciiStr<4> = "teSt" .parse().expect("Failed to parse." ); |
385 | /// let s2: TinyAsciiStr<4> = "TEST" .parse().expect("Failed to parse." ); |
386 | /// let s3: TinyAsciiStr<4> = "001z" .parse().expect("Failed to parse." ); |
387 | /// |
388 | /// assert!(!s1.is_ascii_uppercase()); |
389 | /// assert!(s2.is_ascii_uppercase()); |
390 | /// assert!(!s3.is_ascii_uppercase()); |
391 | /// ``` |
392 | #[inline ] |
393 | #[must_use ] |
394 | pub const fn is_ascii_uppercase(&self) -> bool { |
395 | check_is!( |
396 | self, |
397 | is_ascii_uppercase, |
398 | !is_ascii_lowercase, |
399 | !is_ascii_lowercase |
400 | ) |
401 | } |
402 | |
403 | /// Checks if the value is composed of ASCII alphabetic lower case characters: |
404 | /// |
405 | /// * U+0061 'a' ..= U+007A 'z', |
406 | /// |
407 | /// # Examples |
408 | /// |
409 | /// ``` |
410 | /// use tinystr::TinyAsciiStr; |
411 | /// |
412 | /// let s1: TinyAsciiStr<4> = "Test" .parse().expect("Failed to parse." ); |
413 | /// let s2: TinyAsciiStr<4> = "Te3t" .parse().expect("Failed to parse." ); |
414 | /// let s3: TinyAsciiStr<4> = "teSt" .parse().expect("Failed to parse." ); |
415 | /// let s4: TinyAsciiStr<4> = "test" .parse().expect("Failed to parse." ); |
416 | /// let s5: TinyAsciiStr<4> = "001z" .parse().expect("Failed to parse." ); |
417 | /// |
418 | /// assert!(!s1.is_ascii_alphabetic_lowercase()); |
419 | /// assert!(!s2.is_ascii_alphabetic_lowercase()); |
420 | /// assert!(!s3.is_ascii_alphabetic_lowercase()); |
421 | /// assert!(s4.is_ascii_alphabetic_lowercase()); |
422 | /// assert!(!s5.is_ascii_alphabetic_lowercase()); |
423 | /// ``` |
424 | #[inline ] |
425 | #[must_use ] |
426 | pub const fn is_ascii_alphabetic_lowercase(&self) -> bool { |
427 | check_is!( |
428 | self, |
429 | is_ascii_alphabetic_lowercase, |
430 | is_ascii_lowercase, |
431 | is_ascii_lowercase |
432 | ) |
433 | } |
434 | |
435 | /// Checks if the value is composed of ASCII alphabetic, with the first character being ASCII uppercase, and all others ASCII lowercase. |
436 | /// |
437 | /// # Examples |
438 | /// |
439 | /// ``` |
440 | /// use tinystr::TinyAsciiStr; |
441 | /// |
442 | /// let s1: TinyAsciiStr<4> = "Test" .parse().expect("Failed to parse." ); |
443 | /// let s2: TinyAsciiStr<4> = "Te3t" .parse().expect("Failed to parse." ); |
444 | /// let s3: TinyAsciiStr<4> = "teSt" .parse().expect("Failed to parse." ); |
445 | /// let s4: TinyAsciiStr<4> = "test" .parse().expect("Failed to parse." ); |
446 | /// let s5: TinyAsciiStr<4> = "001z" .parse().expect("Failed to parse." ); |
447 | /// |
448 | /// assert!(s1.is_ascii_alphabetic_titlecase()); |
449 | /// assert!(!s2.is_ascii_alphabetic_titlecase()); |
450 | /// assert!(!s3.is_ascii_alphabetic_titlecase()); |
451 | /// assert!(!s4.is_ascii_alphabetic_titlecase()); |
452 | /// assert!(!s5.is_ascii_alphabetic_titlecase()); |
453 | /// ``` |
454 | #[inline ] |
455 | #[must_use ] |
456 | pub const fn is_ascii_alphabetic_titlecase(&self) -> bool { |
457 | check_is!( |
458 | self, |
459 | is_ascii_alphabetic_titlecase, |
460 | is_ascii_uppercase, |
461 | is_ascii_lowercase |
462 | ) |
463 | } |
464 | |
465 | /// Checks if the value is composed of ASCII alphabetic upper case characters: |
466 | /// |
467 | /// * U+0041 'A' ..= U+005A 'Z', |
468 | /// |
469 | /// # Examples |
470 | /// |
471 | /// ``` |
472 | /// use tinystr::TinyAsciiStr; |
473 | /// |
474 | /// let s1: TinyAsciiStr<4> = "Test" .parse().expect("Failed to parse." ); |
475 | /// let s2: TinyAsciiStr<4> = "Te3t" .parse().expect("Failed to parse." ); |
476 | /// let s3: TinyAsciiStr<4> = "teSt" .parse().expect("Failed to parse." ); |
477 | /// let s4: TinyAsciiStr<4> = "TEST" .parse().expect("Failed to parse." ); |
478 | /// let s5: TinyAsciiStr<4> = "001z" .parse().expect("Failed to parse." ); |
479 | /// |
480 | /// assert!(!s1.is_ascii_alphabetic_uppercase()); |
481 | /// assert!(!s2.is_ascii_alphabetic_uppercase()); |
482 | /// assert!(!s3.is_ascii_alphabetic_uppercase()); |
483 | /// assert!(s4.is_ascii_alphabetic_uppercase()); |
484 | /// assert!(!s5.is_ascii_alphabetic_uppercase()); |
485 | /// ``` |
486 | #[inline ] |
487 | #[must_use ] |
488 | pub const fn is_ascii_alphabetic_uppercase(&self) -> bool { |
489 | check_is!( |
490 | self, |
491 | is_ascii_alphabetic_uppercase, |
492 | is_ascii_uppercase, |
493 | is_ascii_uppercase |
494 | ) |
495 | } |
496 | } |
497 | |
498 | macro_rules! to { |
499 | ($self:ident, $to:ident, $later_char_to:ident $(,$first_char_to:ident)?) => {{ |
500 | let mut i = 0; |
501 | if N <= 4 { |
502 | let aligned = Aligned4::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes(); |
503 | // Won't panic because self.bytes has length N and aligned has length >= N |
504 | #[allow(clippy::indexing_slicing)] |
505 | while i < N { |
506 | $self.bytes[i] = aligned[i]; |
507 | i += 1; |
508 | } |
509 | } else if N <= 8 { |
510 | let aligned = Aligned8::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes(); |
511 | // Won't panic because self.bytes has length N and aligned has length >= N |
512 | #[allow(clippy::indexing_slicing)] |
513 | while i < N { |
514 | $self.bytes[i] = aligned[i]; |
515 | i += 1; |
516 | } |
517 | } else { |
518 | // Won't panic because self.bytes has length N |
519 | #[allow(clippy::indexing_slicing)] |
520 | while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 { |
521 | // SAFETY: AsciiByte is repr(u8) and has same size as u8 |
522 | unsafe { |
523 | $self.bytes[i] = core::mem::transmute( |
524 | ($self.bytes[i] as u8).$later_char_to() |
525 | ); |
526 | } |
527 | i += 1; |
528 | } |
529 | // SAFETY: AsciiByte is repr(u8) and has same size as u8 |
530 | $( |
531 | $self.bytes[0] = unsafe { |
532 | core::mem::transmute(($self.bytes[0] as u8).$first_char_to()) |
533 | }; |
534 | )? |
535 | } |
536 | $self |
537 | }}; |
538 | } |
539 | |
540 | impl<const N: usize> TinyAsciiStr<N> { |
541 | /// Converts this type to its ASCII lower case equivalent in-place. |
542 | /// |
543 | /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', other characters are unchanged. |
544 | /// |
545 | /// # Examples |
546 | /// |
547 | /// ``` |
548 | /// use tinystr::TinyAsciiStr; |
549 | /// |
550 | /// let s1: TinyAsciiStr<4> = "TeS3" .parse().expect("Failed to parse." ); |
551 | /// |
552 | /// assert_eq!(&*s1.to_ascii_lowercase(), "tes3" ); |
553 | /// ``` |
554 | #[inline ] |
555 | #[must_use ] |
556 | pub const fn to_ascii_lowercase(mut self) -> Self { |
557 | to!(self, to_ascii_lowercase, to_ascii_lowercase) |
558 | } |
559 | |
560 | /// Converts this type to its ASCII title case equivalent in-place. |
561 | /// |
562 | /// The first character is converted to ASCII uppercase; the remaining characters |
563 | /// are converted to ASCII lowercase. |
564 | /// |
565 | /// # Examples |
566 | /// |
567 | /// ``` |
568 | /// use tinystr::TinyAsciiStr; |
569 | /// |
570 | /// let s1: TinyAsciiStr<4> = "teSt" .parse().expect("Failed to parse." ); |
571 | /// |
572 | /// assert_eq!(&*s1.to_ascii_titlecase(), "Test" ); |
573 | /// ``` |
574 | #[inline ] |
575 | #[must_use ] |
576 | pub const fn to_ascii_titlecase(mut self) -> Self { |
577 | to!( |
578 | self, |
579 | to_ascii_titlecase, |
580 | to_ascii_lowercase, |
581 | to_ascii_uppercase |
582 | ) |
583 | } |
584 | |
585 | /// Converts this type to its ASCII upper case equivalent in-place. |
586 | /// |
587 | /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', other characters are unchanged. |
588 | /// |
589 | /// # Examples |
590 | /// |
591 | /// ``` |
592 | /// use tinystr::TinyAsciiStr; |
593 | /// |
594 | /// let s1: TinyAsciiStr<4> = "Tes3" .parse().expect("Failed to parse." ); |
595 | /// |
596 | /// assert_eq!(&*s1.to_ascii_uppercase(), "TES3" ); |
597 | /// ``` |
598 | #[inline ] |
599 | #[must_use ] |
600 | pub const fn to_ascii_uppercase(mut self) -> Self { |
601 | to!(self, to_ascii_uppercase, to_ascii_uppercase) |
602 | } |
603 | } |
604 | |
605 | impl<const N: usize> fmt::Debug for TinyAsciiStr<N> { |
606 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
607 | fmt::Debug::fmt(self.as_str(), f) |
608 | } |
609 | } |
610 | |
611 | impl<const N: usize> fmt::Display for TinyAsciiStr<N> { |
612 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
613 | fmt::Display::fmt(self.as_str(), f) |
614 | } |
615 | } |
616 | |
617 | impl<const N: usize> Deref for TinyAsciiStr<N> { |
618 | type Target = str; |
619 | #[inline ] |
620 | fn deref(&self) -> &str { |
621 | self.as_str() |
622 | } |
623 | } |
624 | |
625 | impl<const N: usize> FromStr for TinyAsciiStr<N> { |
626 | type Err = TinyStrError; |
627 | #[inline ] |
628 | fn from_str(s: &str) -> Result<Self, TinyStrError> { |
629 | Self::from_str(s) |
630 | } |
631 | } |
632 | |
633 | impl<const N: usize> PartialEq<str> for TinyAsciiStr<N> { |
634 | fn eq(&self, other: &str) -> bool { |
635 | self.deref() == other |
636 | } |
637 | } |
638 | |
639 | impl<const N: usize> PartialEq<&str> for TinyAsciiStr<N> { |
640 | fn eq(&self, other: &&str) -> bool { |
641 | self.deref() == *other |
642 | } |
643 | } |
644 | |
645 | #[cfg (feature = "alloc" )] |
646 | impl<const N: usize> PartialEq<alloc::string::String> for TinyAsciiStr<N> { |
647 | fn eq(&self, other: &alloc::string::String) -> bool { |
648 | self.deref() == other.deref() |
649 | } |
650 | } |
651 | |
652 | #[cfg (feature = "alloc" )] |
653 | impl<const N: usize> PartialEq<TinyAsciiStr<N>> for alloc::string::String { |
654 | fn eq(&self, other: &TinyAsciiStr<N>) -> bool { |
655 | self.deref() == other.deref() |
656 | } |
657 | } |
658 | |
659 | #[cfg (test)] |
660 | mod test { |
661 | use super::*; |
662 | use rand::distributions::Distribution; |
663 | use rand::distributions::Standard; |
664 | use rand::rngs::SmallRng; |
665 | use rand::seq::SliceRandom; |
666 | use rand::SeedableRng; |
667 | |
668 | const STRINGS: [&str; 26] = [ |
669 | "Latn" , |
670 | "laTn" , |
671 | "windows" , |
672 | "AR" , |
673 | "Hans" , |
674 | "macos" , |
675 | "AT" , |
676 | "infiniband" , |
677 | "FR" , |
678 | "en" , |
679 | "Cyrl" , |
680 | "FromIntegral" , |
681 | "NO" , |
682 | "419" , |
683 | "MacintoshOSX2019" , |
684 | "a3z" , |
685 | "A3z" , |
686 | "A3Z" , |
687 | "a3Z" , |
688 | "3A" , |
689 | "3Z" , |
690 | "3a" , |
691 | "3z" , |
692 | "@@[`{" , |
693 | "UK" , |
694 | "E12" , |
695 | ]; |
696 | |
697 | fn gen_strings(num_strings: usize, allowed_lengths: &[usize]) -> Vec<String> { |
698 | let mut rng = SmallRng::seed_from_u64(2022); |
699 | // Need to do this in 2 steps since the RNG is needed twice |
700 | let string_lengths = core::iter::repeat_with(|| *allowed_lengths.choose(&mut rng).unwrap()) |
701 | .take(num_strings) |
702 | .collect::<Vec<usize>>(); |
703 | string_lengths |
704 | .iter() |
705 | .map(|len| { |
706 | Standard |
707 | .sample_iter(&mut rng) |
708 | .filter(|b: &u8| *b > 0 && *b < 0x80) |
709 | .take(*len) |
710 | .collect::<Vec<u8>>() |
711 | }) |
712 | .map(|byte_vec| String::from_utf8(byte_vec).expect("All ASCII" )) |
713 | .collect() |
714 | } |
715 | |
716 | fn check_operation<T, F1, F2, const N: usize>(reference_f: F1, tinystr_f: F2) |
717 | where |
718 | F1: Fn(&str) -> T, |
719 | F2: Fn(TinyAsciiStr<N>) -> T, |
720 | T: core::fmt::Debug + core::cmp::PartialEq, |
721 | { |
722 | for s in STRINGS |
723 | .into_iter() |
724 | .map(str::to_owned) |
725 | .chain(gen_strings(100, &[3, 4, 5, 8, 12])) |
726 | { |
727 | let t = match TinyAsciiStr::<N>::from_str(&s) { |
728 | Ok(t) => t, |
729 | Err(TinyStrError::TooLarge { .. }) => continue, |
730 | Err(e) => panic!("{}" , e), |
731 | }; |
732 | let expected = reference_f(&s); |
733 | let actual = tinystr_f(t); |
734 | assert_eq!(expected, actual, "TinyAsciiStr< {N}>: {s:?}" ); |
735 | } |
736 | } |
737 | |
738 | #[test ] |
739 | fn test_is_ascii_alphabetic() { |
740 | fn check<const N: usize>() { |
741 | check_operation( |
742 | |s| s.chars().all(|c| c.is_ascii_alphabetic()), |
743 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic(&t), |
744 | ) |
745 | } |
746 | check::<2>(); |
747 | check::<3>(); |
748 | check::<4>(); |
749 | check::<5>(); |
750 | check::<8>(); |
751 | check::<16>(); |
752 | } |
753 | |
754 | #[test ] |
755 | fn test_is_ascii_alphanumeric() { |
756 | fn check<const N: usize>() { |
757 | check_operation( |
758 | |s| s.chars().all(|c| c.is_ascii_alphanumeric()), |
759 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphanumeric(&t), |
760 | ) |
761 | } |
762 | check::<2>(); |
763 | check::<3>(); |
764 | check::<4>(); |
765 | check::<5>(); |
766 | check::<8>(); |
767 | check::<16>(); |
768 | } |
769 | |
770 | #[test ] |
771 | fn test_is_ascii_numeric() { |
772 | fn check<const N: usize>() { |
773 | check_operation( |
774 | |s| s.chars().all(|c| c.is_ascii_digit()), |
775 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_numeric(&t), |
776 | ) |
777 | } |
778 | check::<2>(); |
779 | check::<3>(); |
780 | check::<4>(); |
781 | check::<5>(); |
782 | check::<8>(); |
783 | check::<16>(); |
784 | } |
785 | |
786 | #[test ] |
787 | fn test_is_ascii_lowercase() { |
788 | fn check<const N: usize>() { |
789 | check_operation( |
790 | |s| { |
791 | s == TinyAsciiStr::<16>::from_str(s) |
792 | .unwrap() |
793 | .to_ascii_lowercase() |
794 | .as_str() |
795 | }, |
796 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_lowercase(&t), |
797 | ) |
798 | } |
799 | check::<2>(); |
800 | check::<3>(); |
801 | check::<4>(); |
802 | check::<5>(); |
803 | check::<8>(); |
804 | check::<16>(); |
805 | } |
806 | |
807 | #[test ] |
808 | fn test_is_ascii_titlecase() { |
809 | fn check<const N: usize>() { |
810 | check_operation( |
811 | |s| { |
812 | s == TinyAsciiStr::<16>::from_str(s) |
813 | .unwrap() |
814 | .to_ascii_titlecase() |
815 | .as_str() |
816 | }, |
817 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_titlecase(&t), |
818 | ) |
819 | } |
820 | check::<2>(); |
821 | check::<3>(); |
822 | check::<4>(); |
823 | check::<5>(); |
824 | check::<8>(); |
825 | check::<16>(); |
826 | } |
827 | |
828 | #[test ] |
829 | fn test_is_ascii_uppercase() { |
830 | fn check<const N: usize>() { |
831 | check_operation( |
832 | |s| { |
833 | s == TinyAsciiStr::<16>::from_str(s) |
834 | .unwrap() |
835 | .to_ascii_uppercase() |
836 | .as_str() |
837 | }, |
838 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_uppercase(&t), |
839 | ) |
840 | } |
841 | check::<2>(); |
842 | check::<3>(); |
843 | check::<4>(); |
844 | check::<5>(); |
845 | check::<8>(); |
846 | check::<16>(); |
847 | } |
848 | |
849 | #[test ] |
850 | fn test_is_ascii_alphabetic_lowercase() { |
851 | fn check<const N: usize>() { |
852 | check_operation( |
853 | |s| { |
854 | // Check alphabetic |
855 | s.chars().all(|c| c.is_ascii_alphabetic()) && |
856 | // Check lowercase |
857 | s == TinyAsciiStr::<16>::from_str(s) |
858 | .unwrap() |
859 | .to_ascii_lowercase() |
860 | .as_str() |
861 | }, |
862 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_lowercase(&t), |
863 | ) |
864 | } |
865 | check::<2>(); |
866 | check::<3>(); |
867 | check::<4>(); |
868 | check::<5>(); |
869 | check::<8>(); |
870 | check::<16>(); |
871 | } |
872 | |
873 | #[test ] |
874 | fn test_is_ascii_alphabetic_titlecase() { |
875 | fn check<const N: usize>() { |
876 | check_operation( |
877 | |s| { |
878 | // Check alphabetic |
879 | s.chars().all(|c| c.is_ascii_alphabetic()) && |
880 | // Check titlecase |
881 | s == TinyAsciiStr::<16>::from_str(s) |
882 | .unwrap() |
883 | .to_ascii_titlecase() |
884 | .as_str() |
885 | }, |
886 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_titlecase(&t), |
887 | ) |
888 | } |
889 | check::<2>(); |
890 | check::<3>(); |
891 | check::<4>(); |
892 | check::<5>(); |
893 | check::<8>(); |
894 | check::<16>(); |
895 | } |
896 | |
897 | #[test ] |
898 | fn test_is_ascii_alphabetic_uppercase() { |
899 | fn check<const N: usize>() { |
900 | check_operation( |
901 | |s| { |
902 | // Check alphabetic |
903 | s.chars().all(|c| c.is_ascii_alphabetic()) && |
904 | // Check uppercase |
905 | s == TinyAsciiStr::<16>::from_str(s) |
906 | .unwrap() |
907 | .to_ascii_uppercase() |
908 | .as_str() |
909 | }, |
910 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_uppercase(&t), |
911 | ) |
912 | } |
913 | check::<2>(); |
914 | check::<3>(); |
915 | check::<4>(); |
916 | check::<5>(); |
917 | check::<8>(); |
918 | check::<16>(); |
919 | } |
920 | |
921 | #[test ] |
922 | fn test_to_ascii_lowercase() { |
923 | fn check<const N: usize>() { |
924 | check_operation( |
925 | |s| { |
926 | s.chars() |
927 | .map(|c| c.to_ascii_lowercase()) |
928 | .collect::<String>() |
929 | }, |
930 | |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_lowercase(t).as_str().to_owned(), |
931 | ) |
932 | } |
933 | check::<2>(); |
934 | check::<3>(); |
935 | check::<4>(); |
936 | check::<5>(); |
937 | check::<8>(); |
938 | check::<16>(); |
939 | } |
940 | |
941 | #[test ] |
942 | fn test_to_ascii_titlecase() { |
943 | fn check<const N: usize>() { |
944 | check_operation( |
945 | |s| { |
946 | let mut r = s |
947 | .chars() |
948 | .map(|c| c.to_ascii_lowercase()) |
949 | .collect::<String>(); |
950 | // Safe because the string is nonempty and an ASCII string |
951 | unsafe { r.as_bytes_mut()[0].make_ascii_uppercase() }; |
952 | r |
953 | }, |
954 | |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_titlecase(t).as_str().to_owned(), |
955 | ) |
956 | } |
957 | check::<2>(); |
958 | check::<3>(); |
959 | check::<4>(); |
960 | check::<5>(); |
961 | check::<8>(); |
962 | check::<16>(); |
963 | } |
964 | |
965 | #[test ] |
966 | fn test_to_ascii_uppercase() { |
967 | fn check<const N: usize>() { |
968 | check_operation( |
969 | |s| { |
970 | s.chars() |
971 | .map(|c| c.to_ascii_uppercase()) |
972 | .collect::<String>() |
973 | }, |
974 | |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_uppercase(t).as_str().to_owned(), |
975 | ) |
976 | } |
977 | check::<2>(); |
978 | check::<3>(); |
979 | check::<4>(); |
980 | check::<5>(); |
981 | check::<8>(); |
982 | check::<16>(); |
983 | } |
984 | } |
985 | |