array_str.rs source code [crates/jiff/src/shared/util/array_str.rs]

1	/// A simple and not the most-efficient fixed size string on the stack.
2	///
3	/// This supplanted some uses of `Box<str>` for storing tiny strings in an
4	/// effort to reduce our dependence on dynamic memory allocation.
5	///
6	/// Also, since it isn't needed and it lets us save on storage requirements,
7	/// `N` must be less than `256` (so that the length can fit in a `u8`).
8	#[derive(Clone, Copy, Eq, Hash, PartialEq, PartialOrd, Ord)]
9	#[doc(hidden)] // not part of Jiff's public API
10	pub struct ArrayStr<const N: usize> {
11	/// The UTF-8 bytes that make up the string.
12	///
13	/// This array---the entire array---is always valid UTF-8. And
14	/// the `0..self.len` sub-slice is also always valid UTF-8.
15	bytes: [u8; N],
16	/// The number of bytes used by the string in `bytes`.
17	///
18	/// (We could technically save this byte in some cases and use a NUL
19	/// terminator. For example, since we don't permit NUL bytes in POSIX time
20	/// zone abbreviation strings, but this is simpler and only one byte and
21	/// generalizes. And we're not really trying to micro-optimize the storage
22	/// requirements when we use these array strings. Or at least, I don't know
23	/// of a reason to.)
24	len: u8,
25	}
26
27	impl<const N: usize> ArrayStr<N> {
28	/// Creates a new fixed capacity string.
29	///
30	/// If the given string exceeds `N` bytes, then this returns
31	/// `None`.
32	pub(crate) const fn new(s: &str) -> Option<ArrayStr<N>> {
33	let len = s.len();
34	if len > N {
35	return None;
36	}
37	let mut bytes = [`0`; N];
38	let mut i = `0`;
39	while i < s.as_bytes().len() {
40	bytes[i] = s.as_bytes()[i];
41	i += `1`;
42	}
43	// OK because we don't ever use anything bigger than u8::MAX for `N`.
44	// And we probably shouldn't, because that would be a pretty chunky
45	// array. If such a thing is needed, please file an issue to discuss.
46	debug_assert!(N <= u8::MAX as usize, "size of ArrayStr is too big");
47	Some(ArrayStr { bytes, len: len as u8 })
48	}
49
50	/// Returns the capacity of this array string.
51	pub(crate) fn capacity() -> usize {
52	N
53	}
54
55	/// Append the bytes given to the end of this string.
56	///
57	/// If the capacity would be exceeded, then this is a no-op and `false`
58	/// is returned.
59	pub(crate) fn push_str(&mut self, s: &str) -> bool {
60	let len = usize::from(self.len);
61	let Some(new_len) = len.checked_add(s.len()) else { return `false` };
62	if new_len > N {
63	return `false`;
64	}
65	self.bytes[len..new_len].copy_from_slice(s.as_bytes());
66	// OK because we don't ever use anything bigger than u8::MAX for `N`.
67	// And we probably shouldn't, because that would be a pretty chunky
68	// array. If such a thing is needed, please file an issue to discuss.
69	debug_assert!(
70	N <= usize::from(u8::MAX),
71	"size of ArrayStr is too big"
72	);
73	self.len = u8::try_from(new_len).unwrap();
74	`true`
75	}
76
77	/// Returns this array string as a string slice.
78	pub(crate) fn as_str(&self) -> &str {
79	// OK because construction guarantees valid UTF-8.
80	//
81	// This is bullet proof enough to use unchecked `str` construction
82	// here, but I can't dream up of a benchmark where it matters.
83	core::str::from_utf8(&self.bytes[..usize::from(self.len)]).unwrap()
84	}
85	}
86
87	/// Easy construction of `ArrayStr` from `&'static str`.
88	///
89	/// We specifically limit to `&'static str` to approximate string literals.
90	/// This prevents most cases of accidentally creating a non-string literal
91	/// that panics if the string is too big.
92	///
93	/// This impl primarily exists to make writing tests more convenient.
94	impl<const N: usize> From<&'static str> for ArrayStr<N> {
95	fn from(s: &'static str) -> ArrayStr<N> {
96	ArrayStr::new(s).unwrap()
97	}
98	}
99
100	impl<const N: usize> PartialEq<str> for ArrayStr<N> {
101	fn eq(&self, rhs: &str) -> bool {
102	self.as_str() == rhs
103	}
104	}
105
106	impl<const N: usize> PartialEq<&str> for ArrayStr<N> {
107	fn eq(&self, rhs: &&str) -> bool {
108	self.as_str() == *rhs
109	}
110	}
111
112	impl<const N: usize> PartialEq<ArrayStr<N>> for str {
113	fn eq(&self, rhs: &ArrayStr<N>) -> bool {
114	self == rhs.as_str()
115	}
116	}
117
118	impl<const N: usize> core::fmt::Debug for ArrayStr<N> {
119	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
120	core::fmt::Debug::fmt(self.as_str(), f)
121	}
122	}
123
124	impl<const N: usize> core::fmt::Display for ArrayStr<N> {
125	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
126	core::fmt::Display::fmt(self.as_str(), f)
127	}
128	}
129
130	impl<const N: usize> core::fmt::Write for ArrayStr<N> {
131	fn write_str(&mut self, s: &str) -> core::fmt::Result {
132	if self.push_str(s) {
133	Ok(())
134	} else {
135	Err(core::fmt::Error)
136	}
137	}
138	}
139
140	impl<const N: usize> AsRef<str> for ArrayStr<N> {
141	fn as_ref(&self) -> &str {
142	self.as_str()
143	}
144	}
145
146	/// A self-imposed limit on the size of a time zone abbreviation, in bytes.
147	///
148	/// POSIX says this:
149	///
150	/// > Indicate no less than three, nor more than {TZNAME_MAX}, bytes that are
151	/// > the designation for the standard (std) or the alternative (dst -such as
152	/// > Daylight Savings Time) timezone.
153	///
154	/// But it doesn't seem worth the trouble to query `TZNAME_MAX`. Interestingly,
155	/// IANA says:
156	///
157	/// > are 3 or more characters specifying the standard and daylight saving time
158	/// > (DST) zone abbreviations
159	///
160	/// Which implies that IANA thinks there is no limit. But that seems unwise.
161	/// Moreover, in practice, it seems like the `date` utility supports fairly
162	/// long abbreviations. On my mac (so, BSD `date` as I understand it):
163	///
164	/// ```text
165	/// $ TZ=ZZZ5YYYYYYYYYYYYYYYYYYYYY date
166	/// Sun Mar 17 20:05:58 YYYYYYYYYYYYYYYYYYYYY 2024
167	/// ```
168	///
169	/// And on my Linux machine (so, GNU `date`):
170	///
171	/// ```text
172	/// $ TZ=ZZZ5YYYYYYYYYYYYYYYYYYYYY date
173	/// Sun Mar 17 08:05:36 PM YYYYYYYYYYYYYYYYYYYYY 2024
174	/// ```
175	///
176	/// I don't know exactly what limit these programs use, but 30 seems good
177	/// enough?
178	///
179	/// (Previously, I had been using 255 and stuffing the string in a `Box<str>`.
180	/// But as part of work on [#168], I was looking to remove allocation from as
181	/// many places as possible. And this was one candidate. But making room on the
182	/// stack for 255 byte abbreviations seemed gratuitous. So I picked something
183	/// smaller. If we come across an abbreviation bigger than this max, then we'll
184	/// error.)
185	///
186	/// [#168]: https://github.com/BurntSushi/jiff/issues/168
187	const ABBREVIATION_MAX: usize = `30`;
188
189	/// A type alias for centralizing the definition of a time zone abbreviation.
190	///
191	/// Basically, this creates one single coherent place where we control the
192	/// length of a time zone abbreviation.
193	#[doc(hidden)] // not part of Jiff's public API
194	pub type Abbreviation = ArrayStr<ABBREVIATION_MAX>;
195
196	#[cfg(test)]
197	mod tests {
198	use core::fmt::Write;
199
200	use super::*;
201
202	#[test]
203	fn fmt_write() {
204	let mut dst = ArrayStr::<`5`>::new("").unwrap();
205	assert!(write!(&mut dst, "abcd").is_ok());
206	assert!(write!(&mut dst, "e").is_ok());
207	assert!(write!(&mut dst, "f").is_err());
208	}
209	}
210