1 | /// A simple and not the most-efficient fixed size string on the stack. |
2 | /// |
3 | /// This supplanted some uses of `Box<str>` for storing tiny strings in an |
4 | /// effort to reduce our dependence on dynamic memory allocation. |
5 | /// |
6 | /// Also, since it isn't needed and it lets us save on storage requirements, |
7 | /// `N` must be less than `256` (so that the length can fit in a `u8`). |
8 | #[derive (Clone, Copy, Eq, Hash, PartialEq, PartialOrd, Ord)] |
9 | #[doc (hidden)] // not part of Jiff's public API |
10 | pub struct ArrayStr<const N: usize> { |
11 | /// The UTF-8 bytes that make up the string. |
12 | /// |
13 | /// This array---the entire array---is always valid UTF-8. And |
14 | /// the `0..self.len` sub-slice is also always valid UTF-8. |
15 | bytes: [u8; N], |
16 | /// The number of bytes used by the string in `bytes`. |
17 | /// |
18 | /// (We could technically save this byte in some cases and use a NUL |
19 | /// terminator. For example, since we don't permit NUL bytes in POSIX time |
20 | /// zone abbreviation strings, but this is simpler and only one byte and |
21 | /// generalizes. And we're not really trying to micro-optimize the storage |
22 | /// requirements when we use these array strings. Or at least, I don't know |
23 | /// of a reason to.) |
24 | len: u8, |
25 | } |
26 | |
27 | impl<const N: usize> ArrayStr<N> { |
28 | /// Creates a new fixed capacity string. |
29 | /// |
30 | /// If the given string exceeds `N` bytes, then this returns |
31 | /// `None`. |
32 | pub(crate) const fn new(s: &str) -> Option<ArrayStr<N>> { |
33 | let len = s.len(); |
34 | if len > N { |
35 | return None; |
36 | } |
37 | let mut bytes = [0; N]; |
38 | let mut i = 0; |
39 | while i < s.as_bytes().len() { |
40 | bytes[i] = s.as_bytes()[i]; |
41 | i += 1; |
42 | } |
43 | // OK because we don't ever use anything bigger than u8::MAX for `N`. |
44 | // And we probably shouldn't, because that would be a pretty chunky |
45 | // array. If such a thing is needed, please file an issue to discuss. |
46 | debug_assert!(N <= u8::MAX as usize, "size of ArrayStr is too big" ); |
47 | Some(ArrayStr { bytes, len: len as u8 }) |
48 | } |
49 | |
50 | /// Returns the capacity of this array string. |
51 | pub(crate) fn capacity() -> usize { |
52 | N |
53 | } |
54 | |
55 | /// Append the bytes given to the end of this string. |
56 | /// |
57 | /// If the capacity would be exceeded, then this is a no-op and `false` |
58 | /// is returned. |
59 | pub(crate) fn push_str(&mut self, s: &str) -> bool { |
60 | let len = usize::from(self.len); |
61 | let Some(new_len) = len.checked_add(s.len()) else { return false }; |
62 | if new_len > N { |
63 | return false; |
64 | } |
65 | self.bytes[len..new_len].copy_from_slice(s.as_bytes()); |
66 | // OK because we don't ever use anything bigger than u8::MAX for `N`. |
67 | // And we probably shouldn't, because that would be a pretty chunky |
68 | // array. If such a thing is needed, please file an issue to discuss. |
69 | debug_assert!( |
70 | N <= usize::from(u8::MAX), |
71 | "size of ArrayStr is too big" |
72 | ); |
73 | self.len = u8::try_from(new_len).unwrap(); |
74 | true |
75 | } |
76 | |
77 | /// Returns this array string as a string slice. |
78 | pub(crate) fn as_str(&self) -> &str { |
79 | // OK because construction guarantees valid UTF-8. |
80 | // |
81 | // This is bullet proof enough to use unchecked `str` construction |
82 | // here, but I can't dream up of a benchmark where it matters. |
83 | core::str::from_utf8(&self.bytes[..usize::from(self.len)]).unwrap() |
84 | } |
85 | } |
86 | |
87 | /// Easy construction of `ArrayStr` from `&'static str`. |
88 | /// |
89 | /// We specifically limit to `&'static str` to approximate string literals. |
90 | /// This prevents most cases of accidentally creating a non-string literal |
91 | /// that panics if the string is too big. |
92 | /// |
93 | /// This impl primarily exists to make writing tests more convenient. |
94 | impl<const N: usize> From<&'static str> for ArrayStr<N> { |
95 | fn from(s: &'static str) -> ArrayStr<N> { |
96 | ArrayStr::new(s).unwrap() |
97 | } |
98 | } |
99 | |
100 | impl<const N: usize> PartialEq<str> for ArrayStr<N> { |
101 | fn eq(&self, rhs: &str) -> bool { |
102 | self.as_str() == rhs |
103 | } |
104 | } |
105 | |
106 | impl<const N: usize> PartialEq<&str> for ArrayStr<N> { |
107 | fn eq(&self, rhs: &&str) -> bool { |
108 | self.as_str() == *rhs |
109 | } |
110 | } |
111 | |
112 | impl<const N: usize> PartialEq<ArrayStr<N>> for str { |
113 | fn eq(&self, rhs: &ArrayStr<N>) -> bool { |
114 | self == rhs.as_str() |
115 | } |
116 | } |
117 | |
118 | impl<const N: usize> core::fmt::Debug for ArrayStr<N> { |
119 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
120 | core::fmt::Debug::fmt(self.as_str(), f) |
121 | } |
122 | } |
123 | |
124 | impl<const N: usize> core::fmt::Display for ArrayStr<N> { |
125 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
126 | core::fmt::Display::fmt(self.as_str(), f) |
127 | } |
128 | } |
129 | |
130 | impl<const N: usize> core::fmt::Write for ArrayStr<N> { |
131 | fn write_str(&mut self, s: &str) -> core::fmt::Result { |
132 | if self.push_str(s) { |
133 | Ok(()) |
134 | } else { |
135 | Err(core::fmt::Error) |
136 | } |
137 | } |
138 | } |
139 | |
140 | impl<const N: usize> AsRef<str> for ArrayStr<N> { |
141 | fn as_ref(&self) -> &str { |
142 | self.as_str() |
143 | } |
144 | } |
145 | |
146 | /// A self-imposed limit on the size of a time zone abbreviation, in bytes. |
147 | /// |
148 | /// POSIX says this: |
149 | /// |
150 | /// > Indicate no less than three, nor more than {TZNAME_MAX}, bytes that are |
151 | /// > the designation for the standard (std) or the alternative (dst -such as |
152 | /// > Daylight Savings Time) timezone. |
153 | /// |
154 | /// But it doesn't seem worth the trouble to query `TZNAME_MAX`. Interestingly, |
155 | /// IANA says: |
156 | /// |
157 | /// > are 3 or more characters specifying the standard and daylight saving time |
158 | /// > (DST) zone abbreviations |
159 | /// |
160 | /// Which implies that IANA thinks there is no limit. But that seems unwise. |
161 | /// Moreover, in practice, it seems like the `date` utility supports fairly |
162 | /// long abbreviations. On my mac (so, BSD `date` as I understand it): |
163 | /// |
164 | /// ```text |
165 | /// $ TZ=ZZZ5YYYYYYYYYYYYYYYYYYYYY date |
166 | /// Sun Mar 17 20:05:58 YYYYYYYYYYYYYYYYYYYYY 2024 |
167 | /// ``` |
168 | /// |
169 | /// And on my Linux machine (so, GNU `date`): |
170 | /// |
171 | /// ```text |
172 | /// $ TZ=ZZZ5YYYYYYYYYYYYYYYYYYYYY date |
173 | /// Sun Mar 17 08:05:36 PM YYYYYYYYYYYYYYYYYYYYY 2024 |
174 | /// ``` |
175 | /// |
176 | /// I don't know exactly what limit these programs use, but 30 seems good |
177 | /// enough? |
178 | /// |
179 | /// (Previously, I had been using 255 and stuffing the string in a `Box<str>`. |
180 | /// But as part of work on [#168], I was looking to remove allocation from as |
181 | /// many places as possible. And this was one candidate. But making room on the |
182 | /// stack for 255 byte abbreviations seemed gratuitous. So I picked something |
183 | /// smaller. If we come across an abbreviation bigger than this max, then we'll |
184 | /// error.) |
185 | /// |
186 | /// [#168]: https://github.com/BurntSushi/jiff/issues/168 |
187 | const ABBREVIATION_MAX: usize = 30; |
188 | |
189 | /// A type alias for centralizing the definition of a time zone abbreviation. |
190 | /// |
191 | /// Basically, this creates one single coherent place where we control the |
192 | /// length of a time zone abbreviation. |
193 | #[doc (hidden)] // not part of Jiff's public API |
194 | pub type Abbreviation = ArrayStr<ABBREVIATION_MAX>; |
195 | |
196 | #[cfg (test)] |
197 | mod tests { |
198 | use core::fmt::Write; |
199 | |
200 | use super::*; |
201 | |
202 | #[test ] |
203 | fn fmt_write() { |
204 | let mut dst = ArrayStr::<5>::new("" ).unwrap(); |
205 | assert!(write!(&mut dst, "abcd" ).is_ok()); |
206 | assert!(write!(&mut dst, "e" ).is_ok()); |
207 | assert!(write!(&mut dst, "f" ).is_err()); |
208 | } |
209 | } |
210 | |