lazy.rs source code [crates/regex-automata-0.4.9/src/util/lazy.rs]

1	/!*
2	A lazily initialized value for safe sharing between threads.
3
4	The principal type in this module is `Lazy`, which makes it easy to construct
5	values that are shared safely across multiple threads simultaneously.
6	*/
7
8	use core::fmt;
9
10	/// A lazily initialized value that implements `Deref` for `T`.
11	///
12	/// A `Lazy` takes an initialization function and permits callers from any
13	/// thread to access the result of that initialization function in a safe
14	/// manner. In effect, this permits one-time initialization of global resources
15	/// in a (possibly) multi-threaded program.
16	///
17	/// This type and its functionality are available even when neither the `alloc`
18	/// nor the `std` features are enabled. In exchange, a `Lazy` does not
19	/// guarantee that the given `create` function is called at most once. It
20	/// might be called multiple times. Moreover, a call to `Lazy::get` (either
21	/// explicitly or implicitly via `Lazy`'s `Deref` impl) may block until a `T`
22	/// is available.
23	///
24	/// This is very similar to `lazy_static` or `once_cell`, except it doesn't
25	/// guarantee that the initialization function will be run once and it works
26	/// in no-alloc no-std environments. With that said, if you need stronger
27	/// guarantees or a more flexible API, then it is recommended to use either
28	/// `lazy_static` or `once_cell`.
29	///
30	/// # Warning: may use a spin lock
31	///
32	/// When this crate is compiled _without_ the `alloc` feature, then this type
33	/// may used a spin lock internally. This can have subtle effects that may
34	/// be undesirable. See [Spinlocks Considered Harmful][spinharm] for a more
35	/// thorough treatment of this topic.
36	///
37	/// [spinharm]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html
38	///
39	/// # Example
40	///
41	/// This type is useful for creating regexes once, and then using them from
42	/// multiple threads simultaneously without worrying about synchronization.
43	///
44	/// ```
45	/// use regex_automata::{dfa::regex::Regex, util::lazy::Lazy, Match};
46	///
47	/// static RE: Lazy<Regex> = Lazy::new(\|\| Regex::new("foo[0-9]+bar").unwrap());
48	///
49	/// let expected = Some(Match::must(`0`, `3`..`14`));
50	/// assert_eq!(expected, RE.find(b"zzzfoo12345barzzz"));
51	/// ```
52	pub struct Lazy<T, F = fn() -> T>(lazy::Lazy<T, F>);
53
54	impl<T, F> Lazy<T, F> {
55	/// Create a new `Lazy` value that is initialized via the given function.
56	///
57	/// The `T` type is automatically inferred from the return type of the
58	/// `create` function given.
59	pub const fn new(create: F) -> Lazy<T, F> {
60	Lazy(lazy::Lazy::new(create))
61	}
62	}
63
64	impl<T, F: Fn() -> T> Lazy<T, F> {
65	/// Return a reference to the lazily initialized value.
66	///
67	/// This routine may block if another thread is initializing a `T`.
68	///
69	/// Note that given a `x` which has type `Lazy`, this must be called via
70	/// `Lazy::get(x)` and not `x.get()`. This routine is defined this way
71	/// because `Lazy` impls `Deref` with a target of `T`.
72	///
73	/// # Panics
74	///
75	/// This panics if the `create` function inside this lazy value panics.
76	/// If the panic occurred in another thread, then this routine _may_ also
77	/// panic (but is not guaranteed to do so).
78	pub fn get(this: &Lazy<T, F>) -> &T {
79	this.0.get()
80	}
81	}
82
83	impl<T, F: Fn() -> T> core::ops::Deref for Lazy<T, F> {
84	type Target = T;
85
86	fn deref(&self) -> &T {
87	Lazy::get(self)
88	}
89	}
90
91	impl<T: fmt::Debug, F: Fn() -> T> fmt::Debug for Lazy<T, F> {
92	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
93	self.0.fmt(f)
94	}
95	}
96
97	#[cfg(feature = "alloc")]
98	mod lazy {
99	use core::{
100	fmt,
101	marker::PhantomData,
102	sync::atomic::{AtomicPtr, Ordering},
103	};
104
105	use alloc::boxed::Box;
106
107	/// A non-std lazy initialized value.
108	///
109	/// This might run the initialization function more than once, but will
110	/// never block.
111	///
112	/// I wish I could get these semantics into the non-alloc non-std Lazy
113	/// type below, but I'm not sure how to do it. If you can do an alloc,
114	/// then the implementation becomes very simple if you don't care about
115	/// redundant work precisely because a pointer can be atomically swapped.
116	///
117	/// Perhaps making this approach work in the non-alloc non-std case
118	/// requires asking the caller for a pointer? It would make the API less
119	/// convenient I think.
120	pub(super) struct Lazy<T, F> {
121	data: AtomicPtr<T>,
122	create: F,
123	// This indicates to the compiler that this type can drop T. It's not
124	// totally clear how the absence of this marker could lead to trouble,
125	// but putting here doesn't have any downsides so we hedge until somone
126	// can from the Unsafe Working Group can tell us definitively that we
127	// don't need it.
128	//
129	// See: https://github.com/BurntSushi/regex-automata/issues/30
130	owned: PhantomData<Box<T>>,
131	}
132
133	// SAFETY: So long as T and &T (and F and &F) can themselves be safely
134	// shared among threads, so to can a Lazy<T, _>. Namely, the Lazy API only
135	// permits accessing a &T and initialization is free of data races. So if T
136	// is thread safe, then so to is Lazy<T, _>.
137	//
138	// We specifically require that T: Send in order for Lazy<T> to be Sync.
139	// Without that requirement, it's possible to send a T from one thread to
140	// another via Lazy's destructor.
141	//
142	// It's not clear whether we need F: Send+Sync for Lazy to be Sync. But
143	// we're conservative for now and keep both.
144	unsafe impl<T: Send + Sync, F: Send + Sync> Sync for Lazy<T, F> {}
145
146	impl<T, F> Lazy<T, F> {
147	/// Create a new alloc but non-std lazy value that is racily
148	/// initialized. That is, the 'create' function may be called more than
149	/// once.
150	pub(super) const fn new(create: F) -> Lazy<T, F> {
151	Lazy {
152	data: AtomicPtr::new(core::ptr::null_mut()),
153	create,
154	owned: PhantomData,
155	}
156	}
157	}
158
159	impl<T, F: Fn() -> T> Lazy<T, F> {
160	/// Get the underlying lazy value. If it hasn't been initialized
161	/// yet, then always attempt to initialize it (even if some other
162	/// thread is initializing it) and atomically attach it to this lazy
163	/// value before returning it.
164	pub(super) fn get(&self) -> &T {
165	if let Some(data) = self.poll() {
166	return data;
167	}
168	let data = (self.create)();
169	let mut ptr = Box::into_raw(Box::new(data));
170	// We attempt to stuff our initialized value into our atomic
171	// pointer. Upon success, we don't need to do anything. But if
172	// someone else beat us to the punch, then we need to make sure
173	// our newly created value is dropped.
174	let result = self.data.compare_exchange(
175	core::ptr::null_mut(),
176	ptr,
177	Ordering::AcqRel,
178	Ordering::Acquire,
179	);
180	if let Err(old) = result {
181	// SAFETY: We created 'ptr' via Box::into_raw above, so turning
182	// it back into a Box via from_raw is safe.
183	drop(unsafe { Box::from_raw(ptr) });
184	ptr = old;
185	}
186	// SAFETY: We just set the pointer above to a non-null value, even
187	// in the error case, and set it to a fully initialized value
188	// returned by 'create'.
189	unsafe { &*ptr }
190	}
191
192	/// If this lazy value has been initialized successfully, then return
193	/// that value. Otherwise return None immediately. This never attempts
194	/// to run initialization itself.
195	fn poll(&self) -> Option<&T> {
196	let ptr = self.data.load(Ordering::Acquire);
197	if ptr.is_null() {
198	return None;
199	}
200	// SAFETY: We just checked that the pointer is not null. Since it's
201	// not null, it must have been fully initialized by 'get' at some
202	// point.
203	Some(unsafe { &*ptr })
204	}
205	}
206
207	impl<T: fmt::Debug, F: Fn() -> T> fmt::Debug for Lazy<T, F> {
208	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
209	f.debug_struct("Lazy").field("data", &self.poll()).finish()
210	}
211	}
212
213	impl<T, F> Drop for Lazy<T, F> {
214	fn drop(&mut self) {
215	let ptr = *self.data.get_mut();
216	if !ptr.is_null() {
217	// SAFETY: We just checked that 'ptr' is not null. And since
218	// we have exclusive access, there are no races to worry about.
219	drop(unsafe { Box::from_raw(ptr) });
220	}
221	}
222	}
223	}
224
225	#[cfg(not(feature = "alloc"))]
226	mod lazy {
227	use core::{
228	cell::Cell,
229	fmt,
230	mem::MaybeUninit,
231	panic::{RefUnwindSafe, UnwindSafe},
232	sync::atomic::{AtomicU8, Ordering},
233	};
234
235	/// Our 'Lazy' value can be in one of three states:
236	///
237	/// INIT is where it starts, and also ends up back here if the*
238	/// 'create' routine panics.
239	/// BUSY is where it sits while initialization is running in exactly*
240	/// one thread.
241	/// DONE is where it sits after 'create' has completed and 'data' has*
242	/// been fully initialized.
243	const LAZY_STATE_INIT: u8 = `0`;
244	const LAZY_STATE_BUSY: u8 = `1`;
245	const LAZY_STATE_DONE: u8 = `2`;
246
247	/// A non-alloc non-std lazy initialized value.
248	///
249	/// This guarantees initialization only happens once, but uses a spinlock
250	/// to block in the case of simultaneous access. Blocking occurs so that
251	/// one thread waits while another thread initializes the value.
252	///
253	/// I would much rather have the semantics of the 'alloc' Lazy type above.
254	/// Namely, that we might run the initialization function more than once,
255	/// but we never otherwise block. However, I don't know how to do that in
256	/// a non-alloc non-std context.
257	pub(super) struct Lazy<T, F> {
258	state: AtomicU8,
259	create: Cell<Option<F>>,
260	data: Cell<MaybeUninit<T>>,
261	}
262
263	// SAFETY: So long as T and &T (and F and &F) can themselves be safely
264	// shared among threads, so to can a Lazy<T, _>. Namely, the Lazy API only
265	// permits accessing a &T and initialization is free of data races. So if T
266	// is thread safe, then so to is Lazy<T, _>.
267	unsafe impl<T: Send + Sync, F: Send + Sync> Sync for Lazy<T, F> {}
268	// A reference to a Lazy is unwind safe because we specifically take
269	// precautions to poison all accesses to a Lazy if the caller-provided
270	// 'create' function panics.
271	impl<T: UnwindSafe, F: UnwindSafe + RefUnwindSafe> RefUnwindSafe
272	for Lazy<T, F>
273	{
274	}
275
276	impl<T, F> Lazy<T, F> {
277	/// Create a new non-alloc non-std lazy value that is initialized
278	/// exactly once on first use using the given function.
279	pub(super) const fn new(create: F) -> Lazy<T, F> {
280	Lazy {
281	state: AtomicU8::new(LAZY_STATE_INIT),
282	create: Cell::new(Some(create)),
283	data: Cell::new(MaybeUninit::uninit()),
284	}
285	}
286	}
287
288	impl<T, F: FnOnce() -> T> Lazy<T, F> {
289	/// Get the underlying lazy value. If it isn't been initialized
290	/// yet, then either initialize it or block until some other thread
291	/// initializes it. If the 'create' function given to Lazy::new panics
292	/// (even in another thread), then this panics too.
293	pub(super) fn get(&self) -> &T {
294	// This is effectively a spinlock. We loop until we enter a DONE
295	// state, and if possible, initialize it ourselves. The only way
296	// we exit the loop is if 'create' panics, we initialize 'data' or
297	// some other thread initializes 'data'.
298	//
299	// Yes, I have read spinlocks considered harmful[1]. And that
300	// article is why this spinlock is only active when 'alloc' isn't
301	// enabled. I did this because I don't think there is really
302	// another choice without 'alloc', other than not providing this at
303	// all. But I think that's a big bummer.
304	//
305	// [1]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html
306	while self.state.load(Ordering::Acquire) != LAZY_STATE_DONE {
307	// Check if we're the first ones to get here. If so, we'll be
308	// the ones who initialize.
309	let result = self.state.compare_exchange(
310	LAZY_STATE_INIT,
311	LAZY_STATE_BUSY,
312	Ordering::AcqRel,
313	Ordering::Acquire,
314	);
315	// This means we saw the INIT state and nobody else can. So we
316	// must take responsibility for initializing. And by virtue of
317	// observing INIT, we have also told anyone else trying to
318	// get here that we are BUSY. If someone else sees BUSY, then
319	// they will spin until we finish initialization.
320	if let Ok(_) = result {
321	// Since we are guaranteed to be the only ones here, we
322	// know that 'create' is there... Unless someone else got
323	// here before us and 'create' panicked. In which case,
324	// 'self.create' is now 'None' and we forward the panic
325	// to the caller. (i.e., We implement poisoning.)
326	//
327	// SAFETY: Our use of 'self.state' guarantees that we are
328	// the only thread executing this line, and thus there are
329	// no races.
330	let create = unsafe {
331	(*self.create.as_ptr()).take().expect(
332	"Lazy's create function panicked, \
333	preventing initialization,
334	poisoning current thread",
335	)
336	};
337	let guard = Guard { state: &self.state };
338	// SAFETY: Our use of 'self.state' guarantees that we are
339	// the only thread executing this line, and thus there are
340	// no races.
341	unsafe {
342	(*self.data.as_ptr()).as_mut_ptr().write(create());
343	}
344	// All is well. 'self.create' ran successfully, so we
345	// forget the guard.
346	core::mem::forget(guard);
347	// Everything is initialized, so we can declare success.
348	self.state.store(LAZY_STATE_DONE, Ordering::Release);
349	break;
350	}
351	core::hint::spin_loop();
352	}
353	// We only get here if data is fully initialized, and thus poll
354	// will always return something.
355	self.poll().unwrap()
356	}
357
358	/// If this lazy value has been initialized successfully, then return
359	/// that value. Otherwise return None immediately. This never blocks.
360	fn poll(&self) -> Option<&T> {
361	if self.state.load(Ordering::Acquire) == LAZY_STATE_DONE {
362	// SAFETY: The DONE state only occurs when data has been fully
363	// initialized.
364	Some(unsafe { &(self.data.as_ptr()).as_ptr() })
365	} else {
366	None
367	}
368	}
369	}
370
371	impl<T: fmt::Debug, F: FnMut() -> T> fmt::Debug for Lazy<T, F> {
372	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
373	f.debug_struct("Lazy")
374	.field("state", &self.state.load(Ordering::Acquire))
375	.field("create", &"<closure>")
376	.field("data", &self.poll())
377	.finish()
378	}
379	}
380
381	impl<T, F> Drop for Lazy<T, F> {
382	fn drop(&mut self) {
383	if *self.state.get_mut() == LAZY_STATE_DONE {
384	// SAFETY: state is DONE if and only if data has been fully
385	// initialized. At which point, it is safe to drop.
386	unsafe {
387	self.data.get_mut().assume_init_drop();
388	}
389	}
390	}
391	}
392
393	/// A guard that will reset a Lazy's state back to INIT when dropped. The
394	/// idea here is to 'forget' this guard on success. On failure (when a
395	/// panic occurs), the Drop impl runs and causes all in-progress and future
396	/// 'get' calls to panic. Without this guard, all in-progress and future
397	/// 'get' calls would spin forever. Crashing is much better than getting
398	/// stuck in an infinite loop.
399	struct Guard<'a> {
400	state: &'a AtomicU8,
401	}
402
403	impl<'a> Drop for Guard<'a> {
404	fn drop(&mut self) {
405	// We force ourselves back into an INIT state. This will in turn
406	// cause any future 'get' calls to attempt calling 'self.create'
407	// again which will in turn panic because 'self.create' will now
408	// be 'None'.
409	self.state.store(LAZY_STATE_INIT, Ordering::Release);
410	}
411	}
412	}
413
414	#[cfg(test)]
415	mod tests {
416	use super::*;
417
418	fn assert_send<T: Send>() {}
419	fn assert_sync<T: Sync>() {}
420	fn assert_unwind<T: core::panic::UnwindSafe>() {}
421	fn assert_refunwind<T: core::panic::RefUnwindSafe>() {}
422
423	#[test]
424	fn oibits() {
425	assert_send::<Lazy<u64>>();
426	assert_sync::<Lazy<u64>>();
427	assert_unwind::<Lazy<u64>>();
428	assert_refunwind::<Lazy<u64>>();
429	}
430
431	// This is a regression test because we used to rely on the inferred Sync
432	// impl for the Lazy type defined above (for 'alloc' mode). In the
433	// inferred impl, it only requires that T: Sync for Lazy<T>: Sync. But
434	// if we have that, we can actually make use of the fact that Lazy<T> drops
435	// T to create a value on one thread and drop it on another. This should
436	// require T: Send, but our missing bounds before let it sneak by.
437	//
438	// Basically, this test should not compile, so we... comment it out. We
439	// don't have a great way of testing compile-fail tests right now.
440	//
441	// See: https://github.com/BurntSushi/regex-automata/issues/30
442	/*
443	#[test]
444	fn sync_not_send() {
445	#[allow(dead_code)]
446	fn inner<T: Sync + Default>() {
447	let lazy = Lazy::new(move \|\| T::default());
448	std::thread::scope(\|scope\| {
449	scope.spawn(\|\| {
450	Lazy::get(&lazy); // We create T in this thread
451	});
452	});
453	// And drop in this thread.
454	drop(lazy);
455	// So we have send a !Send type over threads. (with some more
456	// legwork, its possible to even sneak the value out of drop
457	// through thread local)
458	}
459	}
460	*/
461	}
462