1 | // This file is part of ICU4X. For terms of use, please see the file |
---|---|
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | use crate::asciibyte::AsciiByte; |
6 | use crate::int_ops::{Aligned4, Aligned8}; |
7 | use crate::TinyStrError; |
8 | use core::fmt; |
9 | use core::ops::Deref; |
10 | use core::str::{self, FromStr}; |
11 | |
12 | #[repr(transparent)] |
13 | #[derive(PartialEq, Eq, Ord, PartialOrd, Copy, Clone, Hash)] |
14 | pub struct TinyAsciiStr<const N: usize> { |
15 | bytes: [AsciiByte; N], |
16 | } |
17 | |
18 | impl<const N: usize> TinyAsciiStr<N> { |
19 | /// Creates a `TinyAsciiStr<N>` from the given byte slice. |
20 | /// `bytes` may contain at most `N` non-null ASCII bytes. |
21 | pub const fn from_bytes(bytes: &[u8]) -> Result<Self, TinyStrError> { |
22 | Self::from_bytes_inner(bytes, 0, bytes.len(), false) |
23 | } |
24 | |
25 | /// Creates a `TinyAsciiStr<N>` from a byte slice, replacing invalid bytes. |
26 | /// |
27 | /// Null and non-ASCII bytes (i.e. those outside the range `0x01..=0x7F`) |
28 | /// will be replaced with the '?' character. |
29 | /// |
30 | /// The input slice will be truncated if its length exceeds `N`. |
31 | pub const fn from_bytes_lossy(bytes: &[u8]) -> Self { |
32 | const QUESTION: u8 = b'?'; |
33 | let mut out = [0; N]; |
34 | let mut i = 0; |
35 | // Ord is not available in const, so no `.min(N)` |
36 | let len = if bytes.len() > N { N } else { bytes.len() }; |
37 | |
38 | // Indexing is protected by the len check above |
39 | #[allow(clippy::indexing_slicing)] |
40 | while i < len { |
41 | let b = bytes[i]; |
42 | if b > 0 && b < 0x80 { |
43 | out[i] = b; |
44 | } else { |
45 | out[i] = QUESTION; |
46 | } |
47 | i += 1; |
48 | } |
49 | |
50 | Self { |
51 | // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes` |
52 | bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) }, |
53 | } |
54 | } |
55 | |
56 | /// Attempts to parse a fixed-length byte array to a `TinyAsciiStr`. |
57 | /// |
58 | /// The byte array may contain trailing NUL bytes. |
59 | /// |
60 | /// # Example |
61 | /// |
62 | /// ``` |
63 | /// use tinystr::tinystr; |
64 | /// use tinystr::TinyAsciiStr; |
65 | /// |
66 | /// assert_eq!( |
67 | /// TinyAsciiStr::<3>::try_from_raw(*b"GB\0 "), |
68 | /// Ok(tinystr!(3, "GB")) |
69 | /// ); |
70 | /// assert_eq!( |
71 | /// TinyAsciiStr::<3>::try_from_raw(*b"USD"), |
72 | /// Ok(tinystr!(3, "USD")) |
73 | /// ); |
74 | /// assert!(matches!(TinyAsciiStr::<3>::try_from_raw(*b"\0 A\0 "), Err(_))); |
75 | /// ``` |
76 | pub const fn try_from_raw(raw: [u8; N]) -> Result<Self, TinyStrError> { |
77 | Self::from_bytes_inner(&raw, 0, N, true) |
78 | } |
79 | |
80 | /// Equivalent to [`from_bytes(bytes[start..end])`](Self::from_bytes), |
81 | /// but callable in a `const` context (which range indexing is not). |
82 | pub const fn from_bytes_manual_slice( |
83 | bytes: &[u8], |
84 | start: usize, |
85 | end: usize, |
86 | ) -> Result<Self, TinyStrError> { |
87 | Self::from_bytes_inner(bytes, start, end, false) |
88 | } |
89 | |
90 | #[inline] |
91 | pub(crate) const fn from_bytes_inner( |
92 | bytes: &[u8], |
93 | start: usize, |
94 | end: usize, |
95 | allow_trailing_null: bool, |
96 | ) -> Result<Self, TinyStrError> { |
97 | let len = end - start; |
98 | if len > N { |
99 | return Err(TinyStrError::TooLarge { max: N, len }); |
100 | } |
101 | |
102 | let mut out = [0; N]; |
103 | let mut i = 0; |
104 | let mut found_null = false; |
105 | // Indexing is protected by TinyStrError::TooLarge |
106 | #[allow(clippy::indexing_slicing)] |
107 | while i < len { |
108 | let b = bytes[start + i]; |
109 | |
110 | if b == 0 { |
111 | found_null = true; |
112 | } else if b >= 0x80 { |
113 | return Err(TinyStrError::NonAscii); |
114 | } else if found_null { |
115 | // Error if there are contentful bytes after null |
116 | return Err(TinyStrError::ContainsNull); |
117 | } |
118 | out[i] = b; |
119 | |
120 | i += 1; |
121 | } |
122 | |
123 | if !allow_trailing_null && found_null { |
124 | // We found some trailing nulls, error |
125 | return Err(TinyStrError::ContainsNull); |
126 | } |
127 | |
128 | Ok(Self { |
129 | // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes` |
130 | bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) }, |
131 | }) |
132 | } |
133 | |
134 | // TODO: This function shadows the FromStr trait. Rename? |
135 | #[inline] |
136 | pub const fn from_str(s: &str) -> Result<Self, TinyStrError> { |
137 | Self::from_bytes_inner(s.as_bytes(), 0, s.len(), false) |
138 | } |
139 | |
140 | #[inline] |
141 | pub const fn as_str(&self) -> &str { |
142 | // as_bytes is valid utf8 |
143 | unsafe { str::from_utf8_unchecked(self.as_bytes()) } |
144 | } |
145 | |
146 | #[inline] |
147 | #[must_use] |
148 | pub const fn len(&self) -> usize { |
149 | if N <= 4 { |
150 | Aligned4::from_ascii_bytes(&self.bytes).len() |
151 | } else if N <= 8 { |
152 | Aligned8::from_ascii_bytes(&self.bytes).len() |
153 | } else { |
154 | let mut i = 0; |
155 | #[allow(clippy::indexing_slicing)] // < N is safe |
156 | while i < N && self.bytes[i] as u8 != AsciiByte::B0 as u8 { |
157 | i += 1 |
158 | } |
159 | i |
160 | } |
161 | } |
162 | |
163 | #[inline] |
164 | #[must_use] |
165 | pub const fn is_empty(&self) -> bool { |
166 | self.bytes[0] as u8 == AsciiByte::B0 as u8 |
167 | } |
168 | |
169 | #[inline] |
170 | #[must_use] |
171 | pub const fn as_bytes(&self) -> &[u8] { |
172 | // Safe because `self.bytes.as_slice()` pointer-casts to `&[u8]`, |
173 | // and changing the length of that slice to self.len() < N is safe. |
174 | unsafe { |
175 | core::slice::from_raw_parts(self.bytes.as_slice().as_ptr() as *const u8, self.len()) |
176 | } |
177 | } |
178 | |
179 | #[inline] |
180 | #[must_use] |
181 | pub const fn all_bytes(&self) -> &[u8; N] { |
182 | // SAFETY: `self.bytes` has same size as [u8; N] |
183 | unsafe { &*(self.bytes.as_ptr() as *const [u8; N]) } |
184 | } |
185 | |
186 | #[inline] |
187 | #[must_use] |
188 | /// Resizes a `TinyAsciiStr<N>` to a `TinyAsciiStr<M>`. |
189 | /// |
190 | /// If `M < len()` the string gets truncated, otherwise only the |
191 | /// memory representation changes. |
192 | pub const fn resize<const M: usize>(self) -> TinyAsciiStr<M> { |
193 | let mut bytes = [0; M]; |
194 | let mut i = 0; |
195 | // Indexing is protected by the loop guard |
196 | #[allow(clippy::indexing_slicing)] |
197 | while i < M && i < N { |
198 | bytes[i] = self.bytes[i] as u8; |
199 | i += 1; |
200 | } |
201 | // `self.bytes` only contains ASCII bytes, with no null bytes between |
202 | // ASCII characters, so this also holds for `bytes`. |
203 | unsafe { TinyAsciiStr::from_bytes_unchecked(bytes) } |
204 | } |
205 | |
206 | /// # Safety |
207 | /// Must be called with a bytes array made of valid ASCII bytes, with no null bytes |
208 | /// between ASCII characters |
209 | #[must_use] |
210 | pub const unsafe fn from_bytes_unchecked(bytes: [u8; N]) -> Self { |
211 | Self { |
212 | bytes: AsciiByte::to_ascii_byte_array(&bytes), |
213 | } |
214 | } |
215 | } |
216 | |
217 | macro_rules! check_is { |
218 | ($self:ident, $check_int:ident, $check_u8:ident) => { |
219 | if N <= 4 { |
220 | Aligned4::from_ascii_bytes(&$self.bytes).$check_int() |
221 | } else if N <= 8 { |
222 | Aligned8::from_ascii_bytes(&$self.bytes).$check_int() |
223 | } else { |
224 | let mut i = 0; |
225 | // Won't panic because self.bytes has length N |
226 | #[allow(clippy::indexing_slicing)] |
227 | while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 { |
228 | if !($self.bytes[i] as u8).$check_u8() { |
229 | return false; |
230 | } |
231 | i += 1; |
232 | } |
233 | true |
234 | } |
235 | }; |
236 | ($self:ident, $check_int:ident, !$check_u8_0_inv:ident, !$check_u8_1_inv:ident) => { |
237 | if N <= 4 { |
238 | Aligned4::from_ascii_bytes(&$self.bytes).$check_int() |
239 | } else if N <= 8 { |
240 | Aligned8::from_ascii_bytes(&$self.bytes).$check_int() |
241 | } else { |
242 | // Won't panic because N is > 8 |
243 | if ($self.bytes[0] as u8).$check_u8_0_inv() { |
244 | return false; |
245 | } |
246 | let mut i = 1; |
247 | // Won't panic because self.bytes has length N |
248 | #[allow(clippy::indexing_slicing)] |
249 | while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 { |
250 | if ($self.bytes[i] as u8).$check_u8_1_inv() { |
251 | return false; |
252 | } |
253 | i += 1; |
254 | } |
255 | true |
256 | } |
257 | }; |
258 | ($self:ident, $check_int:ident, $check_u8_0_inv:ident, $check_u8_1_inv:ident) => { |
259 | if N <= 4 { |
260 | Aligned4::from_ascii_bytes(&$self.bytes).$check_int() |
261 | } else if N <= 8 { |
262 | Aligned8::from_ascii_bytes(&$self.bytes).$check_int() |
263 | } else { |
264 | // Won't panic because N is > 8 |
265 | if !($self.bytes[0] as u8).$check_u8_0_inv() { |
266 | return false; |
267 | } |
268 | let mut i = 1; |
269 | // Won't panic because self.bytes has length N |
270 | #[allow(clippy::indexing_slicing)] |
271 | while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 { |
272 | if !($self.bytes[i] as u8).$check_u8_1_inv() { |
273 | return false; |
274 | } |
275 | i += 1; |
276 | } |
277 | true |
278 | } |
279 | }; |
280 | } |
281 | |
282 | impl<const N: usize> TinyAsciiStr<N> { |
283 | /// Checks if the value is composed of ASCII alphabetic characters: |
284 | /// |
285 | /// * U+0041 'A' ..= U+005A 'Z', or |
286 | /// * U+0061 'a' ..= U+007A 'z'. |
287 | /// |
288 | /// # Examples |
289 | /// |
290 | /// ``` |
291 | /// use tinystr::TinyAsciiStr; |
292 | /// |
293 | /// let s1: TinyAsciiStr<4> = "Test".parse().expect( "Failed to parse."); |
294 | /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect( "Failed to parse."); |
295 | /// |
296 | /// assert!(s1.is_ascii_alphabetic()); |
297 | /// assert!(!s2.is_ascii_alphabetic()); |
298 | /// ``` |
299 | #[inline] |
300 | #[must_use] |
301 | pub const fn is_ascii_alphabetic(&self) -> bool { |
302 | check_is!(self, is_ascii_alphabetic, is_ascii_alphabetic) |
303 | } |
304 | |
305 | /// Checks if the value is composed of ASCII alphanumeric characters: |
306 | /// |
307 | /// * U+0041 'A' ..= U+005A 'Z', or |
308 | /// * U+0061 'a' ..= U+007A 'z', or |
309 | /// * U+0030 '0' ..= U+0039 '9'. |
310 | /// |
311 | /// # Examples |
312 | /// |
313 | /// ``` |
314 | /// use tinystr::TinyAsciiStr; |
315 | /// |
316 | /// let s1: TinyAsciiStr<4> = "A15b".parse().expect( "Failed to parse."); |
317 | /// let s2: TinyAsciiStr<4> = "[3@w".parse().expect( "Failed to parse."); |
318 | /// |
319 | /// assert!(s1.is_ascii_alphanumeric()); |
320 | /// assert!(!s2.is_ascii_alphanumeric()); |
321 | /// ``` |
322 | #[inline] |
323 | #[must_use] |
324 | pub const fn is_ascii_alphanumeric(&self) -> bool { |
325 | check_is!(self, is_ascii_alphanumeric, is_ascii_alphanumeric) |
326 | } |
327 | |
328 | /// Checks if the value is composed of ASCII decimal digits: |
329 | /// |
330 | /// * U+0030 '0' ..= U+0039 '9'. |
331 | /// |
332 | /// # Examples |
333 | /// |
334 | /// ``` |
335 | /// use tinystr::TinyAsciiStr; |
336 | /// |
337 | /// let s1: TinyAsciiStr<4> = "312".parse().expect( "Failed to parse."); |
338 | /// let s2: TinyAsciiStr<4> = "3d".parse().expect( "Failed to parse."); |
339 | /// |
340 | /// assert!(s1.is_ascii_numeric()); |
341 | /// assert!(!s2.is_ascii_numeric()); |
342 | /// ``` |
343 | #[inline] |
344 | #[must_use] |
345 | pub const fn is_ascii_numeric(&self) -> bool { |
346 | check_is!(self, is_ascii_numeric, is_ascii_digit) |
347 | } |
348 | |
349 | /// Checks if the value is in ASCII lower case. |
350 | /// |
351 | /// All letter characters are checked for case. Non-letter characters are ignored. |
352 | /// |
353 | /// # Examples |
354 | /// |
355 | /// ``` |
356 | /// use tinystr::TinyAsciiStr; |
357 | /// |
358 | /// let s1: TinyAsciiStr<4> = "teSt".parse().expect( "Failed to parse."); |
359 | /// let s2: TinyAsciiStr<4> = "test".parse().expect( "Failed to parse."); |
360 | /// let s3: TinyAsciiStr<4> = "001z".parse().expect( "Failed to parse."); |
361 | /// |
362 | /// assert!(!s1.is_ascii_lowercase()); |
363 | /// assert!(s2.is_ascii_lowercase()); |
364 | /// assert!(s3.is_ascii_lowercase()); |
365 | /// ``` |
366 | #[inline] |
367 | #[must_use] |
368 | pub const fn is_ascii_lowercase(&self) -> bool { |
369 | check_is!( |
370 | self, |
371 | is_ascii_lowercase, |
372 | !is_ascii_uppercase, |
373 | !is_ascii_uppercase |
374 | ) |
375 | } |
376 | |
377 | /// Checks if the value is in ASCII title case. |
378 | /// |
379 | /// This verifies that the first character is ASCII uppercase and all others ASCII lowercase. |
380 | /// Non-letter characters are ignored. |
381 | /// |
382 | /// # Examples |
383 | /// |
384 | /// ``` |
385 | /// use tinystr::TinyAsciiStr; |
386 | /// |
387 | /// let s1: TinyAsciiStr<4> = "teSt".parse().expect( "Failed to parse."); |
388 | /// let s2: TinyAsciiStr<4> = "Test".parse().expect( "Failed to parse."); |
389 | /// let s3: TinyAsciiStr<4> = "001z".parse().expect( "Failed to parse."); |
390 | /// |
391 | /// assert!(!s1.is_ascii_titlecase()); |
392 | /// assert!(s2.is_ascii_titlecase()); |
393 | /// assert!(s3.is_ascii_titlecase()); |
394 | /// ``` |
395 | #[inline] |
396 | #[must_use] |
397 | pub const fn is_ascii_titlecase(&self) -> bool { |
398 | check_is!( |
399 | self, |
400 | is_ascii_titlecase, |
401 | !is_ascii_lowercase, |
402 | !is_ascii_uppercase |
403 | ) |
404 | } |
405 | |
406 | /// Checks if the value is in ASCII upper case. |
407 | /// |
408 | /// All letter characters are checked for case. Non-letter characters are ignored. |
409 | /// |
410 | /// # Examples |
411 | /// |
412 | /// ``` |
413 | /// use tinystr::TinyAsciiStr; |
414 | /// |
415 | /// let s1: TinyAsciiStr<4> = "teSt".parse().expect( "Failed to parse."); |
416 | /// let s2: TinyAsciiStr<4> = "TEST".parse().expect( "Failed to parse."); |
417 | /// let s3: TinyAsciiStr<4> = "001z".parse().expect( "Failed to parse."); |
418 | /// |
419 | /// assert!(!s1.is_ascii_uppercase()); |
420 | /// assert!(s2.is_ascii_uppercase()); |
421 | /// assert!(!s3.is_ascii_uppercase()); |
422 | /// ``` |
423 | #[inline] |
424 | #[must_use] |
425 | pub const fn is_ascii_uppercase(&self) -> bool { |
426 | check_is!( |
427 | self, |
428 | is_ascii_uppercase, |
429 | !is_ascii_lowercase, |
430 | !is_ascii_lowercase |
431 | ) |
432 | } |
433 | |
434 | /// Checks if the value is composed of ASCII alphabetic lower case characters: |
435 | /// |
436 | /// * U+0061 'a' ..= U+007A 'z', |
437 | /// |
438 | /// # Examples |
439 | /// |
440 | /// ``` |
441 | /// use tinystr::TinyAsciiStr; |
442 | /// |
443 | /// let s1: TinyAsciiStr<4> = "Test".parse().expect( "Failed to parse."); |
444 | /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect( "Failed to parse."); |
445 | /// let s3: TinyAsciiStr<4> = "teSt".parse().expect( "Failed to parse."); |
446 | /// let s4: TinyAsciiStr<4> = "test".parse().expect( "Failed to parse."); |
447 | /// let s5: TinyAsciiStr<4> = "001z".parse().expect( "Failed to parse."); |
448 | /// |
449 | /// assert!(!s1.is_ascii_alphabetic_lowercase()); |
450 | /// assert!(!s2.is_ascii_alphabetic_lowercase()); |
451 | /// assert!(!s3.is_ascii_alphabetic_lowercase()); |
452 | /// assert!(s4.is_ascii_alphabetic_lowercase()); |
453 | /// assert!(!s5.is_ascii_alphabetic_lowercase()); |
454 | /// ``` |
455 | #[inline] |
456 | #[must_use] |
457 | pub const fn is_ascii_alphabetic_lowercase(&self) -> bool { |
458 | check_is!( |
459 | self, |
460 | is_ascii_alphabetic_lowercase, |
461 | is_ascii_lowercase, |
462 | is_ascii_lowercase |
463 | ) |
464 | } |
465 | |
466 | /// Checks if the value is composed of ASCII alphabetic, with the first character being ASCII uppercase, and all others ASCII lowercase. |
467 | /// |
468 | /// # Examples |
469 | /// |
470 | /// ``` |
471 | /// use tinystr::TinyAsciiStr; |
472 | /// |
473 | /// let s1: TinyAsciiStr<4> = "Test".parse().expect( "Failed to parse."); |
474 | /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect( "Failed to parse."); |
475 | /// let s3: TinyAsciiStr<4> = "teSt".parse().expect( "Failed to parse."); |
476 | /// let s4: TinyAsciiStr<4> = "test".parse().expect( "Failed to parse."); |
477 | /// let s5: TinyAsciiStr<4> = "001z".parse().expect( "Failed to parse."); |
478 | /// |
479 | /// assert!(s1.is_ascii_alphabetic_titlecase()); |
480 | /// assert!(!s2.is_ascii_alphabetic_titlecase()); |
481 | /// assert!(!s3.is_ascii_alphabetic_titlecase()); |
482 | /// assert!(!s4.is_ascii_alphabetic_titlecase()); |
483 | /// assert!(!s5.is_ascii_alphabetic_titlecase()); |
484 | /// ``` |
485 | #[inline] |
486 | #[must_use] |
487 | pub const fn is_ascii_alphabetic_titlecase(&self) -> bool { |
488 | check_is!( |
489 | self, |
490 | is_ascii_alphabetic_titlecase, |
491 | is_ascii_uppercase, |
492 | is_ascii_lowercase |
493 | ) |
494 | } |
495 | |
496 | /// Checks if the value is composed of ASCII alphabetic upper case characters: |
497 | /// |
498 | /// * U+0041 'A' ..= U+005A 'Z', |
499 | /// |
500 | /// # Examples |
501 | /// |
502 | /// ``` |
503 | /// use tinystr::TinyAsciiStr; |
504 | /// |
505 | /// let s1: TinyAsciiStr<4> = "Test".parse().expect( "Failed to parse."); |
506 | /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect( "Failed to parse."); |
507 | /// let s3: TinyAsciiStr<4> = "teSt".parse().expect( "Failed to parse."); |
508 | /// let s4: TinyAsciiStr<4> = "TEST".parse().expect( "Failed to parse."); |
509 | /// let s5: TinyAsciiStr<4> = "001z".parse().expect( "Failed to parse."); |
510 | /// |
511 | /// assert!(!s1.is_ascii_alphabetic_uppercase()); |
512 | /// assert!(!s2.is_ascii_alphabetic_uppercase()); |
513 | /// assert!(!s3.is_ascii_alphabetic_uppercase()); |
514 | /// assert!(s4.is_ascii_alphabetic_uppercase()); |
515 | /// assert!(!s5.is_ascii_alphabetic_uppercase()); |
516 | /// ``` |
517 | #[inline] |
518 | #[must_use] |
519 | pub const fn is_ascii_alphabetic_uppercase(&self) -> bool { |
520 | check_is!( |
521 | self, |
522 | is_ascii_alphabetic_uppercase, |
523 | is_ascii_uppercase, |
524 | is_ascii_uppercase |
525 | ) |
526 | } |
527 | } |
528 | |
529 | macro_rules! to { |
530 | ($self:ident, $to:ident, $later_char_to:ident $(,$first_char_to:ident)?) => {{ |
531 | let mut i = 0; |
532 | if N <= 4 { |
533 | let aligned = Aligned4::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes(); |
534 | // Won't panic because self.bytes has length N and aligned has length >= N |
535 | #[allow(clippy::indexing_slicing)] |
536 | while i < N { |
537 | $self.bytes[i] = aligned[i]; |
538 | i += 1; |
539 | } |
540 | } else if N <= 8 { |
541 | let aligned = Aligned8::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes(); |
542 | // Won't panic because self.bytes has length N and aligned has length >= N |
543 | #[allow(clippy::indexing_slicing)] |
544 | while i < N { |
545 | $self.bytes[i] = aligned[i]; |
546 | i += 1; |
547 | } |
548 | } else { |
549 | // Won't panic because self.bytes has length N |
550 | #[allow(clippy::indexing_slicing)] |
551 | while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 { |
552 | // SAFETY: AsciiByte is repr(u8) and has same size as u8 |
553 | unsafe { |
554 | $self.bytes[i] = core::mem::transmute::<u8, AsciiByte>( |
555 | ($self.bytes[i] as u8).$later_char_to() |
556 | ); |
557 | } |
558 | i += 1; |
559 | } |
560 | // SAFETY: AsciiByte is repr(u8) and has same size as u8 |
561 | $( |
562 | $self.bytes[0] = unsafe { |
563 | core::mem::transmute::<u8, AsciiByte>(($self.bytes[0] as u8).$first_char_to()) |
564 | }; |
565 | )? |
566 | } |
567 | $self |
568 | }}; |
569 | } |
570 | |
571 | impl<const N: usize> TinyAsciiStr<N> { |
572 | /// Converts this type to its ASCII lower case equivalent in-place. |
573 | /// |
574 | /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', other characters are unchanged. |
575 | /// |
576 | /// # Examples |
577 | /// |
578 | /// ``` |
579 | /// use tinystr::TinyAsciiStr; |
580 | /// |
581 | /// let s1: TinyAsciiStr<4> = "TeS3".parse().expect( "Failed to parse."); |
582 | /// |
583 | /// assert_eq!(&*s1.to_ascii_lowercase(), "tes3"); |
584 | /// ``` |
585 | #[inline] |
586 | #[must_use] |
587 | pub const fn to_ascii_lowercase(mut self) -> Self { |
588 | to!(self, to_ascii_lowercase, to_ascii_lowercase) |
589 | } |
590 | |
591 | /// Converts this type to its ASCII title case equivalent in-place. |
592 | /// |
593 | /// The first character is converted to ASCII uppercase; the remaining characters |
594 | /// are converted to ASCII lowercase. |
595 | /// |
596 | /// # Examples |
597 | /// |
598 | /// ``` |
599 | /// use tinystr::TinyAsciiStr; |
600 | /// |
601 | /// let s1: TinyAsciiStr<4> = "teSt".parse().expect( "Failed to parse."); |
602 | /// |
603 | /// assert_eq!(&*s1.to_ascii_titlecase(), "Test"); |
604 | /// ``` |
605 | #[inline] |
606 | #[must_use] |
607 | pub const fn to_ascii_titlecase(mut self) -> Self { |
608 | to!( |
609 | self, |
610 | to_ascii_titlecase, |
611 | to_ascii_lowercase, |
612 | to_ascii_uppercase |
613 | ) |
614 | } |
615 | |
616 | /// Converts this type to its ASCII upper case equivalent in-place. |
617 | /// |
618 | /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', other characters are unchanged. |
619 | /// |
620 | /// # Examples |
621 | /// |
622 | /// ``` |
623 | /// use tinystr::TinyAsciiStr; |
624 | /// |
625 | /// let s1: TinyAsciiStr<4> = "Tes3".parse().expect( "Failed to parse."); |
626 | /// |
627 | /// assert_eq!(&*s1.to_ascii_uppercase(), "TES3"); |
628 | /// ``` |
629 | #[inline] |
630 | #[must_use] |
631 | pub const fn to_ascii_uppercase(mut self) -> Self { |
632 | to!(self, to_ascii_uppercase, to_ascii_uppercase) |
633 | } |
634 | } |
635 | |
636 | impl<const N: usize> fmt::Debug for TinyAsciiStr<N> { |
637 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
638 | fmt::Debug::fmt(self.as_str(), f) |
639 | } |
640 | } |
641 | |
642 | impl<const N: usize> fmt::Display for TinyAsciiStr<N> { |
643 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
644 | fmt::Display::fmt(self.as_str(), f) |
645 | } |
646 | } |
647 | |
648 | impl<const N: usize> Deref for TinyAsciiStr<N> { |
649 | type Target = str; |
650 | #[inline] |
651 | fn deref(&self) -> &str { |
652 | self.as_str() |
653 | } |
654 | } |
655 | |
656 | impl<const N: usize> FromStr for TinyAsciiStr<N> { |
657 | type Err = TinyStrError; |
658 | #[inline] |
659 | fn from_str(s: &str) -> Result<Self, Self::Err> { |
660 | Self::from_str(s) |
661 | } |
662 | } |
663 | |
664 | impl<const N: usize> PartialEq<str> for TinyAsciiStr<N> { |
665 | fn eq(&self, other: &str) -> bool { |
666 | self.deref() == other |
667 | } |
668 | } |
669 | |
670 | impl<const N: usize> PartialEq<&str> for TinyAsciiStr<N> { |
671 | fn eq(&self, other: &&str) -> bool { |
672 | self.deref() == *other |
673 | } |
674 | } |
675 | |
676 | #[cfg(feature = "alloc")] |
677 | impl<const N: usize> PartialEq<alloc::string::String> for TinyAsciiStr<N> { |
678 | fn eq(&self, other: &alloc::string::String) -> bool { |
679 | self.deref() == other.deref() |
680 | } |
681 | } |
682 | |
683 | #[cfg(feature = "alloc")] |
684 | impl<const N: usize> PartialEq<TinyAsciiStr<N>> for alloc::string::String { |
685 | fn eq(&self, other: &TinyAsciiStr<N>) -> bool { |
686 | self.deref() == other.deref() |
687 | } |
688 | } |
689 | |
690 | #[cfg(test)] |
691 | mod test { |
692 | use super::*; |
693 | use rand::distributions::Distribution; |
694 | use rand::distributions::Standard; |
695 | use rand::rngs::SmallRng; |
696 | use rand::seq::SliceRandom; |
697 | use rand::SeedableRng; |
698 | |
699 | const STRINGS: [&str; 26] = [ |
700 | "Latn", |
701 | "laTn", |
702 | "windows", |
703 | "AR", |
704 | "Hans", |
705 | "macos", |
706 | "AT", |
707 | "infiniband", |
708 | "FR", |
709 | "en", |
710 | "Cyrl", |
711 | "FromIntegral", |
712 | "NO", |
713 | "419", |
714 | "MacintoshOSX2019", |
715 | "a3z", |
716 | "A3z", |
717 | "A3Z", |
718 | "a3Z", |
719 | "3A", |
720 | "3Z", |
721 | "3a", |
722 | "3z", |
723 | "@@[`{", |
724 | "UK", |
725 | "E12", |
726 | ]; |
727 | |
728 | fn gen_strings(num_strings: usize, allowed_lengths: &[usize]) -> Vec<String> { |
729 | let mut rng = SmallRng::seed_from_u64(2022); |
730 | // Need to do this in 2 steps since the RNG is needed twice |
731 | let string_lengths = core::iter::repeat_with(|| *allowed_lengths.choose(&mut rng).unwrap()) |
732 | .take(num_strings) |
733 | .collect::<Vec<usize>>(); |
734 | string_lengths |
735 | .iter() |
736 | .map(|len| { |
737 | Standard |
738 | .sample_iter(&mut rng) |
739 | .filter(|b: &u8| *b > 0 && *b < 0x80) |
740 | .take(*len) |
741 | .collect::<Vec<u8>>() |
742 | }) |
743 | .map(|byte_vec| String::from_utf8(byte_vec).expect("All ASCII")) |
744 | .collect() |
745 | } |
746 | |
747 | fn check_operation<T, F1, F2, const N: usize>(reference_f: F1, tinystr_f: F2) |
748 | where |
749 | F1: Fn(&str) -> T, |
750 | F2: Fn(TinyAsciiStr<N>) -> T, |
751 | T: core::fmt::Debug + core::cmp::PartialEq, |
752 | { |
753 | for s in STRINGS |
754 | .into_iter() |
755 | .map(str::to_owned) |
756 | .chain(gen_strings(100, &[3, 4, 5, 8, 12])) |
757 | { |
758 | let t = match TinyAsciiStr::<N>::from_str(&s) { |
759 | Ok(t) => t, |
760 | Err(TinyStrError::TooLarge { .. }) => continue, |
761 | Err(e) => panic!("{}", e), |
762 | }; |
763 | let expected = reference_f(&s); |
764 | let actual = tinystr_f(t); |
765 | assert_eq!(expected, actual, "TinyAsciiStr<{N}>: {s:?}"); |
766 | } |
767 | } |
768 | |
769 | #[test] |
770 | fn test_is_ascii_alphabetic() { |
771 | fn check<const N: usize>() { |
772 | check_operation( |
773 | |s| s.chars().all(|c| c.is_ascii_alphabetic()), |
774 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic(&t), |
775 | ) |
776 | } |
777 | check::<2>(); |
778 | check::<3>(); |
779 | check::<4>(); |
780 | check::<5>(); |
781 | check::<8>(); |
782 | check::<16>(); |
783 | } |
784 | |
785 | #[test] |
786 | fn test_is_ascii_alphanumeric() { |
787 | fn check<const N: usize>() { |
788 | check_operation( |
789 | |s| s.chars().all(|c| c.is_ascii_alphanumeric()), |
790 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphanumeric(&t), |
791 | ) |
792 | } |
793 | check::<2>(); |
794 | check::<3>(); |
795 | check::<4>(); |
796 | check::<5>(); |
797 | check::<8>(); |
798 | check::<16>(); |
799 | } |
800 | |
801 | #[test] |
802 | fn test_is_ascii_numeric() { |
803 | fn check<const N: usize>() { |
804 | check_operation( |
805 | |s| s.chars().all(|c| c.is_ascii_digit()), |
806 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_numeric(&t), |
807 | ) |
808 | } |
809 | check::<2>(); |
810 | check::<3>(); |
811 | check::<4>(); |
812 | check::<5>(); |
813 | check::<8>(); |
814 | check::<16>(); |
815 | } |
816 | |
817 | #[test] |
818 | fn test_is_ascii_lowercase() { |
819 | fn check<const N: usize>() { |
820 | check_operation( |
821 | |s| { |
822 | s == TinyAsciiStr::<16>::from_str(s) |
823 | .unwrap() |
824 | .to_ascii_lowercase() |
825 | .as_str() |
826 | }, |
827 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_lowercase(&t), |
828 | ) |
829 | } |
830 | check::<2>(); |
831 | check::<3>(); |
832 | check::<4>(); |
833 | check::<5>(); |
834 | check::<8>(); |
835 | check::<16>(); |
836 | } |
837 | |
838 | #[test] |
839 | fn test_is_ascii_titlecase() { |
840 | fn check<const N: usize>() { |
841 | check_operation( |
842 | |s| { |
843 | s == TinyAsciiStr::<16>::from_str(s) |
844 | .unwrap() |
845 | .to_ascii_titlecase() |
846 | .as_str() |
847 | }, |
848 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_titlecase(&t), |
849 | ) |
850 | } |
851 | check::<2>(); |
852 | check::<3>(); |
853 | check::<4>(); |
854 | check::<5>(); |
855 | check::<8>(); |
856 | check::<16>(); |
857 | } |
858 | |
859 | #[test] |
860 | fn test_is_ascii_uppercase() { |
861 | fn check<const N: usize>() { |
862 | check_operation( |
863 | |s| { |
864 | s == TinyAsciiStr::<16>::from_str(s) |
865 | .unwrap() |
866 | .to_ascii_uppercase() |
867 | .as_str() |
868 | }, |
869 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_uppercase(&t), |
870 | ) |
871 | } |
872 | check::<2>(); |
873 | check::<3>(); |
874 | check::<4>(); |
875 | check::<5>(); |
876 | check::<8>(); |
877 | check::<16>(); |
878 | } |
879 | |
880 | #[test] |
881 | fn test_is_ascii_alphabetic_lowercase() { |
882 | fn check<const N: usize>() { |
883 | check_operation( |
884 | |s| { |
885 | // Check alphabetic |
886 | s.chars().all(|c| c.is_ascii_alphabetic()) && |
887 | // Check lowercase |
888 | s == TinyAsciiStr::<16>::from_str(s) |
889 | .unwrap() |
890 | .to_ascii_lowercase() |
891 | .as_str() |
892 | }, |
893 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_lowercase(&t), |
894 | ) |
895 | } |
896 | check::<2>(); |
897 | check::<3>(); |
898 | check::<4>(); |
899 | check::<5>(); |
900 | check::<8>(); |
901 | check::<16>(); |
902 | } |
903 | |
904 | #[test] |
905 | fn test_is_ascii_alphabetic_titlecase() { |
906 | fn check<const N: usize>() { |
907 | check_operation( |
908 | |s| { |
909 | // Check alphabetic |
910 | s.chars().all(|c| c.is_ascii_alphabetic()) && |
911 | // Check titlecase |
912 | s == TinyAsciiStr::<16>::from_str(s) |
913 | .unwrap() |
914 | .to_ascii_titlecase() |
915 | .as_str() |
916 | }, |
917 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_titlecase(&t), |
918 | ) |
919 | } |
920 | check::<2>(); |
921 | check::<3>(); |
922 | check::<4>(); |
923 | check::<5>(); |
924 | check::<8>(); |
925 | check::<16>(); |
926 | } |
927 | |
928 | #[test] |
929 | fn test_is_ascii_alphabetic_uppercase() { |
930 | fn check<const N: usize>() { |
931 | check_operation( |
932 | |s| { |
933 | // Check alphabetic |
934 | s.chars().all(|c| c.is_ascii_alphabetic()) && |
935 | // Check uppercase |
936 | s == TinyAsciiStr::<16>::from_str(s) |
937 | .unwrap() |
938 | .to_ascii_uppercase() |
939 | .as_str() |
940 | }, |
941 | |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_uppercase(&t), |
942 | ) |
943 | } |
944 | check::<2>(); |
945 | check::<3>(); |
946 | check::<4>(); |
947 | check::<5>(); |
948 | check::<8>(); |
949 | check::<16>(); |
950 | } |
951 | |
952 | #[test] |
953 | fn test_to_ascii_lowercase() { |
954 | fn check<const N: usize>() { |
955 | check_operation( |
956 | |s| { |
957 | s.chars() |
958 | .map(|c| c.to_ascii_lowercase()) |
959 | .collect::<String>() |
960 | }, |
961 | |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_lowercase(t).as_str().to_owned(), |
962 | ) |
963 | } |
964 | check::<2>(); |
965 | check::<3>(); |
966 | check::<4>(); |
967 | check::<5>(); |
968 | check::<8>(); |
969 | check::<16>(); |
970 | } |
971 | |
972 | #[test] |
973 | fn test_to_ascii_titlecase() { |
974 | fn check<const N: usize>() { |
975 | check_operation( |
976 | |s| { |
977 | let mut r = s |
978 | .chars() |
979 | .map(|c| c.to_ascii_lowercase()) |
980 | .collect::<String>(); |
981 | // Safe because the string is nonempty and an ASCII string |
982 | unsafe { r.as_bytes_mut()[0].make_ascii_uppercase() }; |
983 | r |
984 | }, |
985 | |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_titlecase(t).as_str().to_owned(), |
986 | ) |
987 | } |
988 | check::<2>(); |
989 | check::<3>(); |
990 | check::<4>(); |
991 | check::<5>(); |
992 | check::<8>(); |
993 | check::<16>(); |
994 | } |
995 | |
996 | #[test] |
997 | fn test_to_ascii_uppercase() { |
998 | fn check<const N: usize>() { |
999 | check_operation( |
1000 | |s| { |
1001 | s.chars() |
1002 | .map(|c| c.to_ascii_uppercase()) |
1003 | .collect::<String>() |
1004 | }, |
1005 | |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_uppercase(t).as_str().to_owned(), |
1006 | ) |
1007 | } |
1008 | check::<2>(); |
1009 | check::<3>(); |
1010 | check::<4>(); |
1011 | check::<5>(); |
1012 | check::<8>(); |
1013 | check::<16>(); |
1014 | } |
1015 | |
1016 | #[test] |
1017 | fn lossy_constructor() { |
1018 | assert_eq!(TinyAsciiStr::<4>::from_bytes_lossy(b"").as_str(), ""); |
1019 | assert_eq!( |
1020 | TinyAsciiStr::<4>::from_bytes_lossy(b"oh\0 o").as_str(), |
1021 | "oh?o" |
1022 | ); |
1023 | assert_eq!(TinyAsciiStr::<4>::from_bytes_lossy(b"\0 ").as_str(), "?"); |
1024 | assert_eq!( |
1025 | TinyAsciiStr::<4>::from_bytes_lossy(b"toolong").as_str(), |
1026 | "tool" |
1027 | ); |
1028 | assert_eq!( |
1029 | TinyAsciiStr::<4>::from_bytes_lossy(&[b'a', 0x80, 0xFF, b'1']).as_str(), |
1030 | "a??1" |
1031 | ); |
1032 | } |
1033 | } |
1034 |
Definitions
- TinyAsciiStr
- bytes
- from_bytes
- from_bytes_lossy
- try_from_raw
- from_bytes_manual_slice
- from_bytes_inner
- from_str
- as_str
- len
- is_empty
- as_bytes
- all_bytes
- resize
- from_bytes_unchecked
- check_is
- is_ascii_alphabetic
- is_ascii_alphanumeric
- is_ascii_numeric
- is_ascii_lowercase
- is_ascii_titlecase
- is_ascii_uppercase
- is_ascii_alphabetic_lowercase
- is_ascii_alphabetic_titlecase
- is_ascii_alphabetic_uppercase
- to
- to_ascii_lowercase
- to_ascii_titlecase
- to_ascii_uppercase
- fmt
- fmt
- Target
- deref
- Err
- from_str
- eq
- eq
- eq
Learn Rust with the experts
Find out more