automaton.rs source code [crates/regex_automata/src/dfa/automaton.rs]

1	use crate::{
2	dfa::search,
3	util::{
4	id::{PatternID, StateID},
5	matchtypes::{HalfMatch, MatchError},
6	prefilter,
7	},
8	};
9
10	/// A trait describing the interface of a deterministic finite automaton (DFA).
11	///
12	/// The complexity of this trait probably means that it's unlikely for others
13	/// to implement it. The primary purpose of the trait is to provide for a way
14	/// of abstracting over different types of DFAs. In this crate, that means
15	/// dense DFAs and sparse DFAs. (Dense DFAs are fast but memory hungry, where
16	/// as sparse DFAs are slower but come with a smaller memory footprint. But
17	/// they otherwise provide exactly equivalent expressive power.) For example, a
18	/// [`dfa::regex::Regex`](crate::dfa::regex::Regex) is generic over this trait.
19	///
20	/// Normally, a DFA's execution model is very simple. You might have a single
21	/// start state, zero or more final or "match" states and a function that
22	/// transitions from one state to the next given the next byte of input.
23	/// Unfortunately, the interface described by this trait is significantly
24	/// more complicated than this. The complexity has a number of different
25	/// reasons, mostly motivated by performance, functionality or space savings:
26	///
27	/// A DFA can search for multiple patterns simultaneously. This*
28	/// means extra information is returned when a match occurs. Namely,
29	/// a match is not just an offset, but an offset plus a pattern ID.
30	/// [`Automaton::pattern_count`] returns the number of patterns compiled into
31	/// the DFA, [`Automaton::match_count`] returns the total number of patterns
32	/// that match in a particular state and [`Automaton::match_pattern`] permits
33	/// iterating over the patterns that match in a particular state.
34	/// A DFA can have multiple start states, and the choice of which start*
35	/// state to use depends on the content of the string being searched and
36	/// position of the search, as well as whether the search is an anchored
37	/// search for a specific pattern in the DFA. Moreover, computing the start
38	/// state also depends on whether you're doing a forward or a reverse search.
39	/// [`Automaton::start_state_forward`] and [`Automaton::start_state_reverse`]
40	/// are used to compute the start state for forward and reverse searches,
41	/// respectively.
42	/// All matches are delayed by one byte to support things like `$` and `\b`*
43	/// at the end of a pattern. Therefore, every use of a DFA is required to use
44	/// [`Automaton::next_eoi_state`]
45	/// at the end of the search to compute the final transition.
46	/// For optimization reasons, some states are treated specially. Every*
47	/// state is either special or not, which can be determined via the
48	/// [`Automaton::is_special_state`] method. If it's special, then the state
49	/// must be at least one of a few possible types of states. (Note that some
50	/// types can overlap, for example, a match state can also be an accel state.
51	/// But some types can't. If a state is a dead state, then it can never be any
52	/// other type of state.) Those types are:
53	/// A dead state. A dead state means the DFA will never enter a match*
54	/// state. This can be queried via the [`Automaton::is_dead_state`] method.
55	/// A quit state. A quit state occurs if the DFA had to stop the search*
56	/// prematurely for some reason. This can be queried via the
57	/// [`Automaton::is_quit_state`] method.
58	/// A match state. A match state occurs when a match is found. When a DFA*
59	/// enters a match state, the search may stop immediately (when looking
60	/// for the earliest match), or it may continue to find the leftmost-first
61	/// match. This can be queried via the [`Automaton::is_match_state`]
62	/// method.
63	/// A start state. A start state is where a search begins. For every*
64	/// search, there is exactly one start state that is used, however, a
65	/// DFA may contain many start states. When the search is in a start
66	/// state, it may use a prefilter to quickly skip to candidate matches
67	/// without executing the DFA on every byte. This can be queried via the
68	/// [`Automaton::is_start_state`] method.
69	/// An accel state. An accel state is a state that is accelerated.*
70	/// That is, it is a state where _most_ of its transitions loop back to
71	/// itself and only a small number of transitions lead to other states.
72	/// This kind of state is said to be accelerated because a search routine
73	/// can quickly look for the bytes leading out of the state instead of
74	/// continuing to execute the DFA on each byte. This can be queried via the
75	/// [`Automaton::is_accel_state`] method. And the bytes that lead out of
76	/// the state can be queried via the [`Automaton::accelerator`] method.
77	///
78	/// There are a number of provided methods on this trait that implement
79	/// efficient searching (for forwards and backwards) with a DFA using all of
80	/// the above features of this trait. In particular, given the complexity of
81	/// all these features, implementing a search routine in this trait is not
82	/// straight forward. If you need to do this for specialized reasons, then
83	/// it's recommended to look at the source of this crate. It is intentionally
84	/// well commented to help with this. With that said, it is possible to
85	/// somewhat simplify the search routine. For example, handling accelerated
86	/// states is strictly optional, since it is always correct to assume that
87	/// `Automaton::is_accel_state` returns false. However, one complex part of
88	/// writing a search routine using this trait is handling the 1-byte delay of a
89	/// match. That is not optional.
90	///
91	/// # Safety
92	///
93	/// This trait is unsafe to implement because DFA searching may rely on the
94	/// correctness of the implementation for memory safety. For example, DFA
95	/// searching may use explicit bounds check elision, which will in turn rely
96	/// on the correctness of every function that returns a state ID.
97	///
98	/// When implementing this trait, one must uphold the documented correctness
99	/// guarantees. Otherwise, undefined behavior may occur.
100	pub unsafe trait Automaton {
101	/// Transitions from the current state to the next state, given the next
102	/// byte of input.
103	///
104	/// Implementations must guarantee that the returned ID is always a valid
105	/// ID when `current` refers to a valid ID. Moreover, the transition
106	/// function must be defined for all possible values of `input`.
107	///
108	/// # Panics
109	///
110	/// If the given ID does not refer to a valid state, then this routine
111	/// may panic but it also may not panic and instead return an invalid ID.
112	/// However, if the caller provides an invalid ID then this must never
113	/// sacrifice memory safety.
114	///
115	/// # Example
116	///
117	/// This shows a simplistic example for walking a DFA for a given haystack
118	/// by using the `next_state` method.
119	///
120	/// ```
121	/// use regex_automata::dfa::{Automaton, dense};
122	///
123	/// let dfa = dense::DFA::new(r"[a-z]+r")?;
124	/// let haystack = "bar".as_bytes();
125	///
126	/// // The start state is determined by inspecting the position and the
127	/// // initial bytes of the haystack.
128	/// let mut state = dfa.start_state_forward(
129	/// None, haystack, `0`, haystack.len(),
130	/// );
131	/// // Walk all the bytes in the haystack.
132	/// for &b in haystack {
133	/// state = dfa.next_state(state, b);
134	/// }
135	/// // Matches are always delayed by 1 byte, so we must explicitly walk the
136	/// // special "EOI" transition at the end of the search.
137	/// state = dfa.next_eoi_state(state);
138	/// assert!(dfa.is_match_state(state));
139	///
140	/// # Ok::<(), Box<dyn std::error::Error>>(())
141	/// ```
142	fn next_state(&self, current: StateID, input: u8) -> StateID;
143
144	/// Transitions from the current state to the next state, given the next
145	/// byte of input.
146	///
147	/// Unlike [`Automaton::next_state`], implementations may implement this
148	/// more efficiently by assuming that the `current` state ID is valid.
149	/// Typically, this manifests by eliding bounds checks.
150	///
151	/// # Safety
152	///
153	/// Callers of this method must guarantee that `current` refers to a valid
154	/// state ID. If `current` is not a valid state ID for this automaton, then
155	/// calling this routine may result in undefined behavior.
156	///
157	/// If `current` is valid, then implementations must guarantee that the ID
158	/// returned is valid for all possible values of `input`.
159	unsafe fn next_state_unchecked(
160	&self,
161	current: StateID,
162	input: u8,
163	) -> StateID;
164
165	/// Transitions from the current state to the next state for the special
166	/// EOI symbol.
167	///
168	/// Implementations must guarantee that the returned ID is always a valid
169	/// ID when `current` refers to a valid ID.
170	///
171	/// This routine must be called at the end of every search in a correct
172	/// implementation of search. Namely, DFAs in this crate delay matches
173	/// by one byte in order to support look-around operators. Thus, after
174	/// reaching the end of a haystack, a search implementation must follow one
175	/// last EOI transition.
176	///
177	/// It is best to think of EOI as an additional symbol in the alphabet of
178	/// a DFA that is distinct from every other symbol. That is, the alphabet
179	/// of DFAs in this crate has a logical size of 257 instead of 256, where
180	/// 256 corresponds to every possible inhabitant of `u8`. (In practice, the
181	/// physical alphabet size may be smaller because of alphabet compression
182	/// via equivalence classes, but EOI is always represented somehow in the
183	/// alphabet.)
184	///
185	/// # Panics
186	///
187	/// If the given ID does not refer to a valid state, then this routine
188	/// may panic but it also may not panic and instead return an invalid ID.
189	/// However, if the caller provides an invalid ID then this must never
190	/// sacrifice memory safety.
191	///
192	/// # Example
193	///
194	/// This shows a simplistic example for walking a DFA for a given haystack,
195	/// and then finishing the search with the final EOI transition.
196	///
197	/// ```
198	/// use regex_automata::dfa::{Automaton, dense};
199	///
200	/// let dfa = dense::DFA::new(r"[a-z]+r")?;
201	/// let haystack = "bar".as_bytes();
202	///
203	/// // The start state is determined by inspecting the position and the
204	/// // initial bytes of the haystack.
205	/// let mut state = dfa.start_state_forward(
206	/// None, haystack, `0`, haystack.len(),
207	/// );
208	/// // Walk all the bytes in the haystack.
209	/// for &b in haystack {
210	/// state = dfa.next_state(state, b);
211	/// }
212	/// // Matches are always delayed by 1 byte, so we must explicitly walk
213	/// // the special "EOI" transition at the end of the search. Without this
214	/// // final transition, the assert below will fail since the DFA will not
215	/// // have entered a match state yet!
216	/// state = dfa.next_eoi_state(state);
217	/// assert!(dfa.is_match_state(state));
218	///
219	/// # Ok::<(), Box<dyn std::error::Error>>(())
220	/// ```
221	fn next_eoi_state(&self, current: StateID) -> StateID;
222
223	/// Return the ID of the start state for this DFA when executing a forward
224	/// search.
225	///
226	/// Unlike typical DFA implementations, the start state for DFAs in this
227	/// crate is dependent on a few different factors:
228	///
229	/// The pattern ID, if present. When the underlying DFA has been compiled*
230	/// with multiple patterns _and_ the DFA has been configured to compile
231	/// an anchored start state for each pattern, then a pattern ID may be
232	/// specified to execute an anchored search for that specific pattern.
233	/// If `pattern_id` is invalid or if the DFA doesn't have start states
234	/// compiled for each pattern, then implementations must panic. DFAs in
235	/// this crate can be configured to compile start states for each pattern
236	/// via
237	/// [`dense::Config::starts_for_each_pattern`](crate::dfa::dense::Config::starts_for_each_pattern).
238	/// When `start > 0`, the byte at index `start - 1` may influence the*
239	/// start state if the regex uses `^` or `\b`.
240	/// Similarly, when `start == 0`, it may influence the start state when*
241	/// the regex uses `^` or `\A`.
242	/// Currently, `end` is unused.*
243	/// Whether the search is a forward or reverse search. This routine can*
244	/// only be used for forward searches.
245	///
246	/// # Panics
247	///
248	/// Implementations must panic if `start..end` is not a valid sub-slice of
249	/// `bytes`. Implementations must also panic if `pattern_id` is non-None
250	/// and does not refer to a valid pattern, or if the DFA was not compiled
251	/// with anchored start states for each pattern.
252	fn start_state_forward(
253	&self,
254	pattern_id: Option<PatternID>,
255	bytes: &[u8],
256	start: usize,
257	end: usize,
258	) -> StateID;
259
260	/// Return the ID of the start state for this DFA when executing a reverse
261	/// search.
262	///
263	/// Unlike typical DFA implementations, the start state for DFAs in this
264	/// crate is dependent on a few different factors:
265	///
266	/// The pattern ID, if present. When the underlying DFA has been compiled*
267	/// with multiple patterns _and_ the DFA has been configured to compile an
268	/// anchored start state for each pattern, then a pattern ID may be
269	/// specified to execute an anchored search for that specific pattern. If
270	/// `pattern_id` is invalid or if the DFA doesn't have start states compiled
271	/// for each pattern, then implementations must panic. DFAs in this crate
272	/// can be configured to compile start states for each pattern via
273	/// [`dense::Config::starts_for_each_pattern`](crate::dfa::dense::Config::starts_for_each_pattern).
274	/// When `end < bytes.len()`, the byte at index `end` may influence the*
275	/// start state if the regex uses `$` or `\b`.
276	/// Similarly, when `end == bytes.len()`, it may influence the start*
277	/// state when the regex uses `$` or `\z`.
278	/// Currently, `start` is unused.*
279	/// Whether the search is a forward or reverse search. This routine can*
280	/// only be used for reverse searches.
281	///
282	/// # Panics
283	///
284	/// Implementations must panic if `start..end` is not a valid sub-slice of
285	/// `bytes`. Implementations must also panic if `pattern_id` is non-None
286	/// and does not refer to a valid pattern, or if the DFA was not compiled
287	/// with anchored start states for each pattern.
288	fn start_state_reverse(
289	&self,
290	pattern_id: Option<PatternID>,
291	bytes: &[u8],
292	start: usize,
293	end: usize,
294	) -> StateID;
295
296	/// Returns true if and only if the given identifier corresponds to a
297	/// "special" state. A special state is one or more of the following:
298	/// a dead state, a quit state, a match state, a start state or an
299	/// accelerated state.
300	///
301	/// A correct implementation _may_ always return false for states that
302	/// are either start states or accelerated states, since that information
303	/// is only intended to be used for optimization purposes. Correct
304	/// implementations must return true if the state is a dead, quit or match
305	/// state. This is because search routines using this trait must be able
306	/// to rely on `is_special_state` as an indicator that a state may need
307	/// special treatment. (For example, when a search routine sees a dead
308	/// state, it must terminate.)
309	///
310	/// This routine permits search implementations to use a single branch to
311	/// check whether a state needs special attention before executing the next
312	/// transition. The example below shows how to do this.
313	///
314	/// # Example
315	///
316	/// This example shows how `is_special_state` can be used to implement a
317	/// correct search routine with minimal branching. In particular, this
318	/// search routine implements "leftmost" matching, which means that it
319	/// doesn't immediately stop once a match is found. Instead, it continues
320	/// until it reaches a dead state.
321	///
322	/// ```
323	/// use regex_automata::{
324	/// dfa::{Automaton, dense},
325	/// HalfMatch, MatchError, PatternID,
326	/// };
327	///
328	/// fn find_leftmost_first<A: Automaton>(
329	/// dfa: &A,
330	/// haystack: &[u8],
331	/// ) -> Result<Option<HalfMatch>, MatchError> {
332	/// // The start state is determined by inspecting the position and the
333	/// // initial bytes of the haystack. Note that start states can never
334	/// // be match states (since DFAs in this crate delay matches by 1
335	/// // byte), so we don't need to check if the start state is a match.
336	/// let mut state = dfa.start_state_forward(
337	/// None, haystack, `0`, haystack.len(),
338	/// );
339	/// let mut last_match = None;
340	/// // Walk all the bytes in the haystack. We can quit early if we see
341	/// // a dead or a quit state. The former means the automaton will
342	/// // never transition to any other state. The latter means that the
343	/// // automaton entered a condition in which its search failed.
344	/// for (i, &b) in haystack.iter().enumerate() {
345	/// state = dfa.next_state(state, b);
346	/// if dfa.is_special_state(state) {
347	/// if dfa.is_match_state(state) {
348	/// last_match = Some(HalfMatch::new(
349	/// dfa.match_pattern(state, `0`),
350	/// i,
351	/// ));
352	/// } else if dfa.is_dead_state(state) {
353	/// return Ok(last_match);
354	/// } else if dfa.is_quit_state(state) {
355	/// // It is possible to enter into a quit state after
356	/// // observing a match has occurred. In that case, we
357	/// // should return the match instead of an error.
358	/// if last_match.is_some() {
359	/// return Ok(last_match);
360	/// }
361	/// return Err(MatchError::Quit { byte: b, offset: i });
362	/// }
363	/// // Implementors may also want to check for start or accel
364	/// // states and handle them differently for performance
365	/// // reasons. But it is not necessary for correctness.
366	/// }
367	/// }
368	/// // Matches are always delayed by 1 byte, so we must explicitly walk
369	/// // the special "EOI" transition at the end of the search.
370	/// state = dfa.next_eoi_state(state);
371	/// if dfa.is_match_state(state) {
372	/// last_match = Some(HalfMatch::new(
373	/// dfa.match_pattern(state, `0`),
374	/// haystack.len(),
375	/// ));
376	/// }
377	/// Ok(last_match)
378	/// }
379	///
380	/// // We use a greedy '+' operator to show how the search doesn't just
381	/// // stop once a match is detected. It continues extending the match.
382	/// // Using '[a-z]+?' would also work as expected and stop the search
383	/// // early. Greediness is built into the automaton.
384	/// let dfa = dense::DFA::new(r"[a-z]+")?;
385	/// let haystack = "123 foobar 4567".as_bytes();
386	/// let mat = find_leftmost_first(&dfa, haystack)?.unwrap();
387	/// assert_eq!(mat.pattern().as_usize(), `0`);
388	/// assert_eq!(mat.offset(), `10`);
389	///
390	/// // Here's another example that tests our handling of the special EOI
391	/// // transition. This will fail to find a match if we don't call
392	/// // 'next_eoi_state' at the end of the search since the match isn't
393	/// // found until the final byte in the haystack.
394	/// let dfa = dense::DFA::new(r"[0-9]{4}")?;
395	/// let haystack = "123 foobar 4567".as_bytes();
396	/// let mat = find_leftmost_first(&dfa, haystack)?.unwrap();
397	/// assert_eq!(mat.pattern().as_usize(), `0`);
398	/// assert_eq!(mat.offset(), `15`);
399	///
400	/// // And note that our search implementation above automatically works
401	/// // with multi-DFAs. Namely, `dfa.match_pattern(match_state, 0)` selects
402	/// // the appropriate pattern ID for us.
403	/// let dfa = dense::DFA::new_many(&[r"[a-z]+", r"[0-9]+"])?;
404	/// let haystack = "123 foobar 4567".as_bytes();
405	/// let mat = find_leftmost_first(&dfa, haystack)?.unwrap();
406	/// assert_eq!(mat.pattern().as_usize(), `1`);
407	/// assert_eq!(mat.offset(), `3`);
408	/// let mat = find_leftmost_first(&dfa, &haystack[`3`..])?.unwrap();
409	/// assert_eq!(mat.pattern().as_usize(), `0`);
410	/// assert_eq!(mat.offset(), `7`);
411	/// let mat = find_leftmost_first(&dfa, &haystack[`10`..])?.unwrap();
412	/// assert_eq!(mat.pattern().as_usize(), `1`);
413	/// assert_eq!(mat.offset(), `5`);
414	///
415	/// # Ok::<(), Box<dyn std::error::Error>>(())
416	/// ```
417	fn is_special_state(&self, id: StateID) -> bool;
418
419	/// Returns true if and only if the given identifier corresponds to a dead
420	/// state. When a DFA enters a dead state, it is impossible to leave. That
421	/// is, every transition on a dead state by definition leads back to the
422	/// same dead state.
423	///
424	/// In practice, the dead state always corresponds to the identifier `0`.
425	/// Moreover, in practice, there is only one dead state.
426	///
427	/// The existence of a dead state is not strictly required in the classical
428	/// model of finite state machines, where one generally only cares about
429	/// the question of whether an input sequence matches or not. Dead states
430	/// are not needed to answer that question, since one can immediately quit
431	/// as soon as one enters a final or "match" state. However, we don't just
432	/// care about matches but also care about the location of matches, and
433	/// more specifically, care about semantics like "greedy" matching.
434	///
435	/// For example, given the pattern `a+` and the input `aaaz`, the dead
436	/// state won't be entered until the state machine reaches `z` in the
437	/// input, at which point, the search routine can quit. But without the
438	/// dead state, the search routine wouldn't know when to quit. In a
439	/// classical representation, the search routine would stop after seeing
440	/// the first `a` (which is when the search would enter a match state). But
441	/// this wouldn't implement "greedy" matching where `a+` matches as many
442	/// `a`'s as possible.
443	///
444	/// # Example
445	///
446	/// See the example for [`Automaton::is_special_state`] for how to use this
447	/// method correctly.
448	fn is_dead_state(&self, id: StateID) -> bool;
449
450	/// Returns true if and only if the given identifier corresponds to a quit
451	/// state. A quit state is like a dead state (it has no transitions other
452	/// than to itself), except it indicates that the DFA failed to complete
453	/// the search. When this occurs, callers can neither accept or reject that
454	/// a match occurred.
455	///
456	/// In practice, the quit state always corresponds to the state immediately
457	/// following the dead state. (Which is not usually represented by `1`,
458	/// since state identifiers are pre-multiplied by the state machine's
459	/// alphabet stride, and the alphabet stride varies between DFAs.)
460	///
461	/// By default, state machines created by this crate will never enter a
462	/// quit state. Since entering a quit state is the only way for a DFA
463	/// in this crate to fail at search time, it follows that the default
464	/// configuration can never produce a match error. Nevertheless, handling
465	/// quit states is necessary to correctly support all configurations in
466	/// this crate.
467	///
468	/// The typical way in which a quit state can occur is when heuristic
469	/// support for Unicode word boundaries is enabled via the
470	/// [`dense::Config::unicode_word_boundary`](crate::dfa::dense::Config::unicode_word_boundary)
471	/// option. But other options, like the lower level
472	/// [`dense::Config::quit`](crate::dfa::dense::Config::quit)
473	/// configuration, can also result in a quit state being entered. The
474	/// purpose of the quit state is to provide a way to execute a fast DFA
475	/// in common cases while delegating to slower routines when the DFA quits.
476	///
477	/// The default search implementations provided by this crate will return
478	/// a [`MatchError::Quit`](crate::MatchError::Quit) error when a quit state
479	/// is entered.
480	///
481	/// # Example
482	///
483	/// See the example for [`Automaton::is_special_state`] for how to use this
484	/// method correctly.
485	fn is_quit_state(&self, id: StateID) -> bool;
486
487	/// Returns true if and only if the given identifier corresponds to a
488	/// match state. A match state is also referred to as a "final" state and
489	/// indicates that a match has been found.
490	///
491	/// If all you care about is whether a particular pattern matches in the
492	/// input sequence, then a search routine can quit early as soon as the
493	/// machine enters a match state. However, if you're looking for the
494	/// standard "leftmost-first" match location, then search _must_ continue
495	/// until either the end of the input or until the machine enters a dead
496	/// state. (Since either condition implies that no other useful work can
497	/// be done.) Namely, when looking for the location of a match, then
498	/// search implementations should record the most recent location in
499	/// which a match state was entered, but otherwise continue executing the
500	/// search as normal. (The search may even leave the match state.) Once
501	/// the termination condition is reached, the most recently recorded match
502	/// location should be returned.
503	///
504	/// Finally, one additional power given to match states in this crate
505	/// is that they are always associated with a specific pattern in order
506	/// to support multi-DFAs. See [`Automaton::match_pattern`] for more
507	/// details and an example for how to query the pattern associated with a
508	/// particular match state.
509	///
510	/// # Example
511	///
512	/// See the example for [`Automaton::is_special_state`] for how to use this
513	/// method correctly.
514	fn is_match_state(&self, id: StateID) -> bool;
515
516	/// Returns true if and only if the given identifier corresponds to a
517	/// start state. A start state is a state in which a DFA begins a search.
518	/// All searches begin in a start state. Moreover, since all matches are
519	/// delayed by one byte, a start state can never be a match state.
520	///
521	/// The main role of a start state is, as mentioned, to be a starting
522	/// point for a DFA. This starting point is determined via one of
523	/// [`Automaton::start_state_forward`] or
524	/// [`Automaton::start_state_reverse`], depending on whether one is doing
525	/// a forward or a reverse search, respectively.
526	///
527	/// A secondary use of start states is for prefix acceleration. Namely,
528	/// while executing a search, if one detects that you're in a start state,
529	/// then it may be faster to look for the next match of a prefix of the
530	/// pattern, if one exists. If a prefix exists and since all matches must
531	/// begin with that prefix, then skipping ahead to occurrences of that
532	/// prefix may be much faster than executing the DFA.
533	///
534	/// # Example
535	///
536	/// This example shows how to implement your own search routine that does
537	/// a prefix search whenever the search enters a start state.
538	///
539	/// Note that you do not need to implement your own search routine to
540	/// make use of prefilters like this. The search routines provided
541	/// by this crate already implement prefilter support via the
542	/// [`Prefilter`](crate::util::prefilter::Prefilter) trait. The various
543	/// `find__at` routines on this trait support the `Prefilter` trait*
544	/// through [`Scanner`](crate::util::prefilter::Scanner)s. This example is
545	/// meant to show how you might deal with prefilters in a simplified case
546	/// if you are implementing your own search routine.
547	///
548	/// ```
549	/// use regex_automata::{
550	/// MatchError, PatternID,
551	/// dfa::{Automaton, dense},
552	/// HalfMatch,
553	/// };
554	///
555	/// fn find_byte(slice: &[u8], at: usize, byte: u8) -> Option<usize> {
556	/// // Would be faster to use the memchr crate, but this is still
557	/// // faster than running through the DFA.
558	/// slice[at..].iter().position(\|&b\| b == byte).map(\|i\| at + i)
559	/// }
560	///
561	/// fn find_leftmost_first<A: Automaton>(
562	/// dfa: &A,
563	/// haystack: &[u8],
564	/// prefix_byte: Option<u8>,
565	/// ) -> Result<Option<HalfMatch>, MatchError> {
566	/// // See the Automaton::is_special_state example for similar code
567	/// // with more comments.
568	///
569	/// let mut state = dfa.start_state_forward(
570	/// None, haystack, `0`, haystack.len(),
571	/// );
572	/// let mut last_match = None;
573	/// let mut pos = `0`;
574	/// while pos < haystack.len() {
575	/// let b = haystack[pos];
576	/// state = dfa.next_state(state, b);
577	/// pos += `1`;
578	/// if dfa.is_special_state(state) {
579	/// if dfa.is_match_state(state) {
580	/// last_match = Some(HalfMatch::new(
581	/// dfa.match_pattern(state, `0`),
582	/// pos - `1`,
583	/// ));
584	/// } else if dfa.is_dead_state(state) {
585	/// return Ok(last_match);
586	/// } else if dfa.is_quit_state(state) {
587	/// // It is possible to enter into a quit state after
588	/// // observing a match has occurred. In that case, we
589	/// // should return the match instead of an error.
590	/// if last_match.is_some() {
591	/// return Ok(last_match);
592	/// }
593	/// return Err(MatchError::Quit {
594	/// byte: b, offset: pos - `1`,
595	/// });
596	/// } else if dfa.is_start_state(state) {
597	/// // If we're in a start state and know all matches begin
598	/// // with a particular byte, then we can quickly skip to
599	/// // candidate matches without running the DFA through
600	/// // every byte inbetween.
601	/// if let Some(prefix_byte) = prefix_byte {
602	/// pos = match find_byte(haystack, pos, prefix_byte) {
603	/// Some(pos) => pos,
604	/// None => break,
605	/// };
606	/// }
607	/// }
608	/// }
609	/// }
610	/// // Matches are always delayed by 1 byte, so we must explicitly walk
611	/// // the special "EOI" transition at the end of the search.
612	/// state = dfa.next_eoi_state(state);
613	/// if dfa.is_match_state(state) {
614	/// last_match = Some(HalfMatch::new(
615	/// dfa.match_pattern(state, `0`),
616	/// haystack.len(),
617	/// ));
618	/// }
619	/// Ok(last_match)
620	/// }
621	///
622	/// // In this example, it's obvious that all occurrences of our pattern
623	/// // begin with 'Z', so we pass in 'Z'.
624	/// let dfa = dense::DFA::new(r"Z[a-z]+")?;
625	/// let haystack = "123 foobar Zbaz quux".as_bytes();
626	/// let mat = find_leftmost_first(&dfa, haystack, Some(b'Z'))?.unwrap();
627	/// assert_eq!(mat.pattern().as_usize(), `0`);
628	/// assert_eq!(mat.offset(), `15`);
629	///
630	/// // But note that we don't need to pass in a prefix byte. If we don't,
631	/// // then the search routine does no acceleration.
632	/// let mat = find_leftmost_first(&dfa, haystack, None)?.unwrap();
633	/// assert_eq!(mat.pattern().as_usize(), `0`);
634	/// assert_eq!(mat.offset(), `15`);
635	///
636	/// // However, if we pass an incorrect byte, then the prefix search will
637	/// // result in incorrect results.
638	/// assert_eq!(find_leftmost_first(&dfa, haystack, Some(b'X'))?, None);
639	///
640	/// # Ok::<(), Box<dyn std::error::Error>>(())
641	/// ```
642	fn is_start_state(&self, id: StateID) -> bool;
643
644	/// Returns true if and only if the given identifier corresponds to an
645	/// accelerated state.
646	///
647	/// An accelerated state is a special optimization
648	/// trick implemented by this crate. Namely, if
649	/// [`dense::Config::accelerate`](crate::dfa::dense::Config::accelerate) is
650	/// enabled (and it is by default), then DFAs generated by this crate will
651	/// tag states meeting certain characteristics as accelerated. States meet
652	/// this criteria whenever most of their transitions are self-transitions.
653	/// That is, transitions that loop back to the same state. When a small
654	/// number of transitions aren't self-transitions, then it follows that
655	/// there are only a small number of bytes that can cause the DFA to leave
656	/// that state. Thus, there is an opportunity to look for those bytes
657	/// using more optimized routines rather than continuing to run through
658	/// the DFA. This trick is similar to the prefilter idea described in
659	/// the documentation of [`Automaton::is_start_state`] with two main
660	/// differences:
661	///
662	/// 1. It is more limited since acceleration only applies to single bytes.
663	/// This means states are rarely accelerated when Unicode mode is enabled
664	/// (which is enabled by default).
665	/// 2. It can occur anywhere in the DFA, which increases optimization
666	/// opportunities.
667	///
668	/// Like the prefilter idea, the main downside (and a possible reason to
669	/// disable it) is that it can lead to worse performance in some cases.
670	/// Namely, if a state is accelerated for very common bytes, then the
671	/// overhead of checking for acceleration and using the more optimized
672	/// routines to look for those bytes can cause overall performance to be
673	/// worse than if acceleration wasn't enabled at all.
674	///
675	/// A simple example of a regex that has an accelerated state is
676	/// `(?-u)[^a]+a`. Namely, the `[^a]+` sub-expression gets compiled down
677	/// into a single state where all transitions except for `a` loop back to
678	/// itself, and where `a` is the only transition (other than the special
679	/// EOI transition) that goes to some other state. Thus, this state can
680	/// be accelerated and implemented more efficiently by calling an
681	/// optimized routine like `memchr` with `a` as the needle. Notice that
682	/// the `(?-u)` to disable Unicode is necessary here, as without it,
683	/// `[^a]` will match any UTF-8 encoding of any Unicode scalar value other
684	/// than `a`. This more complicated expression compiles down to many DFA
685	/// states and the simple acceleration optimization is no longer available.
686	///
687	/// Typically, this routine is used to guard calls to
688	/// [`Automaton::accelerator`], which returns the accelerated bytes for
689	/// the specified state.
690	fn is_accel_state(&self, id: StateID) -> bool;
691
692	/// Returns the total number of patterns compiled into this DFA.
693	///
694	/// In the case of a DFA that contains no patterns, this must return `0`.
695	///
696	/// # Example
697	///
698	/// This example shows the pattern count for a DFA that never matches:
699	///
700	/// ```
701	/// use regex_automata::dfa::{Automaton, dense::DFA};
702	///
703	/// let dfa: DFA<Vec<u32>> = DFA::never_match()?;
704	/// assert_eq!(dfa.pattern_count(), `0`);
705	/// # Ok::<(), Box<dyn std::error::Error>>(())
706	/// ```
707	///
708	/// And another example for a DFA that matches at every position:
709	///
710	/// ```
711	/// use regex_automata::dfa::{Automaton, dense::DFA};
712	///
713	/// let dfa: DFA<Vec<u32>> = DFA::always_match()?;
714	/// assert_eq!(dfa.pattern_count(), `1`);
715	/// # Ok::<(), Box<dyn std::error::Error>>(())
716	/// ```
717	///
718	/// And finally, a DFA that was constructed from multiple patterns:
719	///
720	/// ```
721	/// use regex_automata::dfa::{Automaton, dense::DFA};
722	///
723	/// let dfa = DFA::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?;
724	/// assert_eq!(dfa.pattern_count(), `3`);
725	/// # Ok::<(), Box<dyn std::error::Error>>(())
726	/// ```
727	fn pattern_count(&self) -> usize;
728
729	/// Returns the total number of patterns that match in this state.
730	///
731	/// If the given state is not a match state, then implementations may
732	/// panic.
733	///
734	/// If the DFA was compiled with one pattern, then this must necessarily
735	/// always return `1` for all match states.
736	///
737	/// Implementations must guarantee that [`Automaton::match_pattern`] can
738	/// be called with indices up to (but not including) the count returned by
739	/// this routine without panicking.
740	///
741	/// # Panics
742	///
743	/// Implementations are permitted to panic if the provided state ID does
744	/// not correspond to a match state.
745	///
746	/// # Example
747	///
748	/// This example shows a simple instance of implementing overlapping
749	/// matches. In particular, it shows not only how to determine how many
750	/// patterns have matched in a particular state, but also how to access
751	/// which specific patterns have matched.
752	///
753	/// Notice that we must use [`MatchKind::All`](crate::MatchKind::All)
754	/// when building the DFA. If we used
755	/// [`MatchKind::LeftmostFirst`](crate::MatchKind::LeftmostFirst)
756	/// instead, then the DFA would not be constructed in a way that supports
757	/// overlapping matches. (It would only report a single pattern that
758	/// matches at any particular point in time.)
759	///
760	/// Another thing to take note of is the patterns used and the order in
761	/// which the pattern IDs are reported. In the example below, pattern `3`
762	/// is yielded first. Why? Because it corresponds to the match that
763	/// appears first. Namely, the `@` symbol is part of `\S+` but not part
764	/// of any of the other patterns. Since the `\S+` pattern has a match that
765	/// starts to the left of any other pattern, its ID is returned before any
766	/// other.
767	///
768	/// ```
769	/// use regex_automata::{
770	/// dfa::{Automaton, dense},
771	/// MatchKind,
772	/// };
773	///
774	/// let dfa = dense::Builder::new()
775	/// .configure(dense::Config::new().match_kind(MatchKind::All))
776	/// .build_many(&[
777	/// r"\w+", r"[a-z]+", r"[A-Z]+", r"\S+",
778	/// ])?;
779	/// let haystack = "@bar".as_bytes();
780	///
781	/// // The start state is determined by inspecting the position and the
782	/// // initial bytes of the haystack.
783	/// let mut state = dfa.start_state_forward(
784	/// None, haystack, `0`, haystack.len(),
785	/// );
786	/// // Walk all the bytes in the haystack.
787	/// for &b in haystack {
788	/// state = dfa.next_state(state, b);
789	/// }
790	/// state = dfa.next_eoi_state(state);
791	///
792	/// assert!(dfa.is_match_state(state));
793	/// assert_eq!(dfa.match_count(state), `3`);
794	/// // The following calls are guaranteed to not panic since `match_count`
795	/// // returned `3` above.
796	/// assert_eq!(dfa.match_pattern(state, `0`).as_usize(), `3`);
797	/// assert_eq!(dfa.match_pattern(state, `1`).as_usize(), `0`);
798	/// assert_eq!(dfa.match_pattern(state, `2`).as_usize(), `1`);
799	///
800	/// # Ok::<(), Box<dyn std::error::Error>>(())
801	/// ```
802	fn match_count(&self, id: StateID) -> usize;
803
804	/// Returns the pattern ID corresponding to the given match index in the
805	/// given state.
806	///
807	/// See [`Automaton::match_count`] for an example of how to use this
808	/// method correctly. Note that if you know your DFA is compiled with a
809	/// single pattern, then this routine is never necessary since it will
810	/// always return a pattern ID of `0` for an index of `0` when `id`
811	/// corresponds to a match state.
812	///
813	/// Typically, this routine is used when implementing an overlapping
814	/// search, as the example for `Automaton::match_count` does.
815	///
816	/// # Panics
817	///
818	/// If the state ID is not a match state or if the match index is out
819	/// of bounds for the given state, then this routine may either panic
820	/// or produce an incorrect result. If the state ID is correct and the
821	/// match index is correct, then this routine must always produce a valid
822	/// `PatternID`.
823	fn match_pattern(&self, id: StateID, index: usize) -> PatternID;
824
825	/// Return a slice of bytes to accelerate for the given state, if possible.
826	///
827	/// If the given state has no accelerator, then an empty slice must be
828	/// returned. If `Automaton::is_accel_state` returns true for the given
829	/// ID, then this routine _must_ return a non-empty slice, but it is not
830	/// required to do so.
831	///
832	/// If the given ID is not a valid state ID for this automaton, then
833	/// implementations may panic or produce incorrect results.
834	///
835	/// See [`Automaton::is_accel_state`] for more details on state
836	/// acceleration.
837	///
838	/// By default, this method will always return an empty slice.
839	///
840	/// # Example
841	///
842	/// This example shows a contrived case in which we build a regex that we
843	/// know is accelerated and extract the accelerator from a state.
844	///
845	/// ```
846	/// use regex_automata::{
847	/// nfa::thompson,
848	/// dfa::{Automaton, dense},
849	/// util::id::StateID,
850	/// SyntaxConfig,
851	/// };
852	///
853	/// let dfa = dense::Builder::new()
854	/// // We disable Unicode everywhere and permit the regex to match
855	/// // invalid UTF-8. e.g., `[^abc]` matches `\xFF`, which is not valid
856	/// // UTF-8.
857	/// .syntax(SyntaxConfig::new().unicode(`false`).utf8(`false`))
858	/// // This makes the implicit `(?s:.)?` prefix added to the regex*
859	/// // match through arbitrary bytes instead of being UTF-8 aware. This
860	/// // isn't necessary to get acceleration to work in this case, but
861	/// // it does make the DFA substantially simpler.
862	/// .thompson(thompson::Config::new().utf8(`false`))
863	/// .build("[^abc]+a")?;
864	///
865	/// // Here we just pluck out the state that we know is accelerated.
866	/// // While the stride calculations are something that can be relied
867	/// // on by callers, the specific position of the accelerated state is
868	/// // implementation defined.
869	/// //
870	/// // N.B. We get '3' by inspecting the state machine using 'regex-cli'.
871	/// // e.g., try `regex-cli debug dfa dense '[^abc]+a' -BbUC`.
872	/// let id = StateID::new(`3` * dfa.stride()).unwrap();
873	/// let accelerator = dfa.accelerator(id);
874	/// // The `[^abc]+` sub-expression permits [a, b, c] to be accelerated.
875	/// assert_eq!(accelerator, &[b'a', b'b', b'c']);
876	/// # Ok::<(), Box<dyn std::error::Error>>(())
877	/// ```
878	fn accelerator(&self, _id: StateID) -> &[u8] {
879	&[]
880	}
881
882	/// Executes a forward search and returns the end position of the first
883	/// match that is found as early as possible. If no match exists, then
884	/// `None` is returned.
885	///
886	/// This routine stops scanning input as soon as the search observes a
887	/// match state. This is useful for implementing boolean `is_match`-like
888	/// routines, where as little work is done as possible.
889	///
890	/// See [`Automaton::find_earliest_fwd_at`] for additional functionality,
891	/// such as providing a prefilter, a specific pattern to match and the
892	/// bounds of the search within the haystack. This routine is meant as
893	/// a convenience for common cases where the additional functionality is
894	/// not needed.
895	///
896	/// # Errors
897	///
898	/// This routine only errors if the search could not complete. For
899	/// DFAs generated by this crate, this only occurs in a non-default
900	/// configuration where quit bytes are used or Unicode word boundaries are
901	/// heuristically enabled.
902	///
903	/// When a search cannot complete, callers cannot know whether a match
904	/// exists or not.
905	///
906	/// # Example
907	///
908	/// This example shows how to use this method with a
909	/// [`dense::DFA`](crate::dfa::dense::DFA). In particular, it demonstrates
910	/// how the position returned might differ from what one might expect when
911	/// executing a traditional leftmost search.
912	///
913	/// ```
914	/// use regex_automata::{
915	/// dfa::{Automaton, dense},
916	/// HalfMatch,
917	/// };
918	///
919	/// let dfa = dense::DFA::new("foo[0-9]+")?;
920	/// // Normally, the end of the leftmost first match here would be 8,
921	/// // corresponding to the end of the input. But the "earliest" semantics
922	/// // this routine cause it to stop as soon as a match is known, which
923	/// // occurs once 'foo[0-9]' has matched.
924	/// let expected = HalfMatch::must(`0`, `4`);
925	/// assert_eq!(Some(expected), dfa.find_earliest_fwd(b"foo12345")?);
926	///
927	/// let dfa = dense::DFA::new("abc\|a")?;
928	/// // Normally, the end of the leftmost first match here would be 3,
929	/// // but the shortest match semantics detect a match earlier.
930	/// let expected = HalfMatch::must(`0`, `1`);
931	/// assert_eq!(Some(expected), dfa.find_earliest_fwd(b"abc")?);
932	/// # Ok::<(), Box<dyn std::error::Error>>(())
933	/// ```
934	#[inline]
935	fn find_earliest_fwd(
936	&self,
937	bytes: &[u8],
938	) -> Result<Option<HalfMatch>, MatchError> {
939	self.find_earliest_fwd_at(None, None, bytes, `0`, bytes.len())
940	}
941
942	/// Executes a reverse search and returns the start position of the first
943	/// match that is found as early as possible. If no match exists, then
944	/// `None` is returned.
945	///
946	/// This routine stops scanning input as soon as the search observes a
947	/// match state.
948	///
949	/// Note that while it is not technically necessary to build a reverse
950	/// automaton to use a reverse search, it is likely that you'll want to do
951	/// so. Namely, the typical use of a reverse search is to find the starting
952	/// location of a match once its end is discovered from a forward search. A
953	/// reverse DFA automaton can be built by configuring the intermediate NFA
954	/// to be reversed via
955	/// [`nfa::thompson::Config::reverse`](crate::nfa::thompson::Config::reverse).
956	///
957	/// # Errors
958	///
959	/// This routine only errors if the search could not complete. For
960	/// DFAs generated by this crate, this only occurs in a non-default
961	/// configuration where quit bytes are used or Unicode word boundaries are
962	/// heuristically enabled.
963	///
964	/// When a search cannot complete, callers cannot know whether a match
965	/// exists or not.
966	///
967	/// # Example
968	///
969	/// This example shows how to use this method with a
970	/// [`dense::DFA`](crate::dfa::dense::DFA). In particular, it demonstrates
971	/// how the position returned might differ from what one might expect when
972	/// executing a traditional leftmost reverse search.
973	///
974	/// ```
975	/// use regex_automata::{
976	/// nfa::thompson,
977	/// dfa::{Automaton, dense},
978	/// HalfMatch,
979	/// };
980	///
981	/// let dfa = dense::Builder::new()
982	/// .thompson(thompson::Config::new().reverse(`true`))
983	/// .build("[a-z]+[0-9]+")?;
984	/// // Normally, the end of the leftmost first match here would be 0,
985	/// // corresponding to the beginning of the input. But the "earliest"
986	/// // semantics of this routine cause it to stop as soon as a match is
987	/// // known, which occurs once '[a-z][0-9]+' has matched.
988	/// let expected = HalfMatch::must(`0`, `2`);
989	/// assert_eq!(Some(expected), dfa.find_earliest_rev(b"foo12345")?);
990	///
991	/// let dfa = dense::Builder::new()
992	/// .thompson(thompson::Config::new().reverse(`true`))
993	/// .build("abc\|c")?;
994	/// // Normally, the end of the leftmost first match here would be 0,
995	/// // but the shortest match semantics detect a match earlier.
996	/// let expected = HalfMatch::must(`0`, `2`);
997	/// assert_eq!(Some(expected), dfa.find_earliest_rev(b"abc")?);
998	/// # Ok::<(), Box<dyn std::error::Error>>(())
999	/// ```
1000	#[inline]
1001	fn find_earliest_rev(
1002	&self,
1003	bytes: &[u8],
1004	) -> Result<Option<HalfMatch>, MatchError> {
1005	self.find_earliest_rev_at(None, bytes, `0`, bytes.len())
1006	}
1007
1008	/// Executes a forward search and returns the end position of the leftmost
1009	/// match that is found. If no match exists, then `None` is returned.
1010	///
1011	/// # Errors
1012	///
1013	/// This routine only errors if the search could not complete. For
1014	/// DFAs generated by this crate, this only occurs in a non-default
1015	/// configuration where quit bytes are used or Unicode word boundaries are
1016	/// heuristically enabled.
1017	///
1018	/// When a search cannot complete, callers cannot know whether a match
1019	/// exists or not.
1020	///
1021	/// # Notes for implementors
1022	///
1023	/// Implementors of this trait are not required to implement any particular
1024	/// match semantics (such as leftmost-first), which are instead manifest in
1025	/// the DFA's transitions.
1026	///
1027	/// In particular, this method must continue searching even after it enters
1028	/// a match state. The search should only terminate once it has reached
1029	/// the end of the input or when it has entered a dead or quit state. Upon
1030	/// termination, the position of the last byte seen while still in a match
1031	/// state is returned.
1032	///
1033	/// Since this trait provides an implementation for this method by default,
1034	/// it's unlikely that one will need to implement this.
1035	///
1036	/// # Example
1037	///
1038	/// This example shows how to use this method with a
1039	/// [`dense::DFA`](crate::dfa::dense::DFA). By default, a dense DFA uses
1040	/// "leftmost first" match semantics.
1041	///
1042	/// Leftmost first match semantics corresponds to the match with the
1043	/// smallest starting offset, but where the end offset is determined by
1044	/// preferring earlier branches in the original regular expression. For
1045	/// example, `Sam\|Samwise` will match `Sam` in `Samwise`, but `Samwise\|Sam`
1046	/// will match `Samwise` in `Samwise`.
1047	///
1048	/// Generally speaking, the "leftmost first" match is how most backtracking
1049	/// regular expressions tend to work. This is in contrast to POSIX-style
1050	/// regular expressions that yield "leftmost longest" matches. Namely,
1051	/// both `Sam\|Samwise` and `Samwise\|Sam` match `Samwise` when using
1052	/// leftmost longest semantics. (This crate does not currently support
1053	/// leftmost longest semantics.)
1054	///
1055	/// ```
1056	/// use regex_automata::{
1057	/// dfa::{Automaton, dense},
1058	/// HalfMatch,
1059	/// };
1060	///
1061	/// let dfa = dense::DFA::new("foo[0-9]+")?;
1062	/// let expected = HalfMatch::must(`0`, `8`);
1063	/// assert_eq!(Some(expected), dfa.find_leftmost_fwd(b"foo12345")?);
1064	///
1065	/// // Even though a match is found after reading the first byte (`a`),
1066	/// // the leftmost first match semantics demand that we find the earliest
1067	/// // match that prefers earlier parts of the pattern over latter parts.
1068	/// let dfa = dense::DFA::new("abc\|a")?;
1069	/// let expected = HalfMatch::must(`0`, `3`);
1070	/// assert_eq!(Some(expected), dfa.find_leftmost_fwd(b"abc")?);
1071	///
1072	/// # Ok::<(), Box<dyn std::error::Error>>(())
1073	/// ```
1074	#[inline]
1075	fn find_leftmost_fwd(
1076	&self,
1077	bytes: &[u8],
1078	) -> Result<Option<HalfMatch>, MatchError> {
1079	self.find_leftmost_fwd_at(None, None, bytes, `0`, bytes.len())
1080	}
1081
1082	/// Executes a reverse search and returns the start of the position of the
1083	/// leftmost match that is found. If no match exists, then `None` is
1084	/// returned.
1085	///
1086	/// # Errors
1087	///
1088	/// This routine only errors if the search could not complete. For
1089	/// DFAs generated by this crate, this only occurs in a non-default
1090	/// configuration where quit bytes are used or Unicode word boundaries are
1091	/// heuristically enabled.
1092	///
1093	/// When a search cannot complete, callers cannot know whether a match
1094	/// exists or not.
1095	///
1096	/// # Notes for implementors
1097	///
1098	/// Implementors of this trait are not required to implement any particular
1099	/// match semantics (such as leftmost-first), which are instead manifest in
1100	/// the DFA's transitions.
1101	///
1102	/// In particular, this method must continue searching even after it enters
1103	/// a match state. The search should only terminate once it has reached
1104	/// the end of the input or when it has entered a dead or quit state. Upon
1105	/// termination, the position of the last byte seen while still in a match
1106	/// state is returned.
1107	///
1108	/// Since this trait provides an implementation for this method by default,
1109	/// it's unlikely that one will need to implement this.
1110	///
1111	/// # Example
1112	///
1113	/// This example shows how to use this method with a
1114	/// [`dense::DFA`](crate::dfa::dense::DFA). In particular, this routine
1115	/// is principally useful when used in conjunction with the
1116	/// [`nfa::thompson::Config::reverse`](crate::nfa::thompson::Config::reverse)
1117	/// configuration. In general, it's unlikely to be correct to use both
1118	/// `find_leftmost_fwd` and `find_leftmost_rev` with the same DFA since any
1119	/// particular DFA will only support searching in one direction with
1120	/// respect to the pattern.
1121	///
1122	/// ```
1123	/// use regex_automata::{
1124	/// nfa::thompson,
1125	/// dfa::{Automaton, dense},
1126	/// HalfMatch,
1127	/// };
1128	///
1129	/// let dfa = dense::Builder::new()
1130	/// .thompson(thompson::Config::new().reverse(`true`))
1131	/// .build("foo[0-9]+")?;
1132	/// let expected = HalfMatch::must(`0`, `0`);
1133	/// assert_eq!(Some(expected), dfa.find_leftmost_rev(b"foo12345")?);
1134	///
1135	/// // Even though a match is found after reading the last byte (`c`),
1136	/// // the leftmost first match semantics demand that we find the earliest
1137	/// // match that prefers earlier parts of the pattern over latter parts.
1138	/// let dfa = dense::Builder::new()
1139	/// .thompson(thompson::Config::new().reverse(`true`))
1140	/// .build("abc\|c")?;
1141	/// let expected = HalfMatch::must(`0`, `0`);
1142	/// assert_eq!(Some(expected), dfa.find_leftmost_rev(b"abc")?);
1143	///
1144	/// # Ok::<(), Box<dyn std::error::Error>>(())
1145	/// ```
1146	#[inline]
1147	fn find_leftmost_rev(
1148	&self,
1149	bytes: &[u8],
1150	) -> Result<Option<HalfMatch>, MatchError> {
1151	self.find_leftmost_rev_at(None, bytes, `0`, bytes.len())
1152	}
1153
1154	/// Executes an overlapping forward search and returns the end position of
1155	/// matches as they are found. If no match exists, then `None` is returned.
1156	///
1157	/// This routine is principally only useful when searching for multiple
1158	/// patterns on inputs where multiple patterns may match the same regions
1159	/// of text. In particular, callers must preserve the automaton's search
1160	/// state from prior calls so that the implementation knows where the last
1161	/// match occurred.
1162	///
1163	/// # Errors
1164	///
1165	/// This routine only errors if the search could not complete. For
1166	/// DFAs generated by this crate, this only occurs in a non-default
1167	/// configuration where quit bytes are used or Unicode word boundaries are
1168	/// heuristically enabled.
1169	///
1170	/// When a search cannot complete, callers cannot know whether a match
1171	/// exists or not.
1172	///
1173	/// # Example
1174	///
1175	/// This example shows how to run a basic overlapping search with a
1176	/// [`dense::DFA`](crate::dfa::dense::DFA). Notice that we build the
1177	/// automaton with a `MatchKind::All` configuration. Overlapping searches
1178	/// are unlikely to work as one would expect when using the default
1179	/// `MatchKind::LeftmostFirst` match semantics, since leftmost-first
1180	/// matching is fundamentally incompatible with overlapping searches.
1181	/// Namely, overlapping searches need to report matches as they are seen,
1182	/// where as leftmost-first searches will continue searching even after a
1183	/// match has been observed in order to find the conventional end position
1184	/// of the match. More concretely, leftmost-first searches use dead states
1185	/// to terminate a search after a specific match can no longer be extended.
1186	/// Overlapping searches instead do the opposite by continuing the search
1187	/// to find totally new matches (potentially of other patterns).
1188	///
1189	/// ```
1190	/// use regex_automata::{
1191	/// dfa::{Automaton, OverlappingState, dense},
1192	/// HalfMatch,
1193	/// MatchKind,
1194	/// };
1195	///
1196	/// let dfa = dense::Builder::new()
1197	/// .configure(dense::Config::new().match_kind(MatchKind::All))
1198	/// .build_many(&[r"\w+$", r"\S+$"])?;
1199	/// let haystack = "@foo".as_bytes();
1200	/// let mut state = OverlappingState::start();
1201	///
1202	/// let expected = Some(HalfMatch::must(`1`, `4`));
1203	/// let got = dfa.find_overlapping_fwd(haystack, &mut state)?;
1204	/// assert_eq!(expected, got);
1205	///
1206	/// // The first pattern also matches at the same position, so re-running
1207	/// // the search will yield another match. Notice also that the first
1208	/// // pattern is returned after the second. This is because the second
1209	/// // pattern begins its match before the first, is therefore an earlier
1210	/// // match and is thus reported first.
1211	/// let expected = Some(HalfMatch::must(`0`, `4`));
1212	/// let got = dfa.find_overlapping_fwd(haystack, &mut state)?;
1213	/// assert_eq!(expected, got);
1214	///
1215	/// # Ok::<(), Box<dyn std::error::Error>>(())
1216	/// ```
1217	#[inline]
1218	fn find_overlapping_fwd(
1219	&self,
1220	bytes: &[u8],
1221	state: &mut OverlappingState,
1222	) -> Result<Option<HalfMatch>, MatchError> {
1223	self.find_overlapping_fwd_at(None, None, bytes, `0`, bytes.len(), state)
1224	}
1225
1226	/// Executes a forward search and returns the end position of the first
1227	/// match that is found as early as possible. If no match exists, then
1228	/// `None` is returned.
1229	///
1230	/// This routine stops scanning input as soon as the search observes a
1231	/// match state. This is useful for implementing boolean `is_match`-like
1232	/// routines, where as little work is done as possible.
1233	///
1234	/// This is like [`Automaton::find_earliest_fwd`], except it provides some
1235	/// additional control over how the search is executed:
1236	///
1237	/// `pre` is a prefilter scanner that, when given, is used whenever the*
1238	/// DFA enters its starting state. This is meant to speed up searches where
1239	/// one or a small number of literal prefixes are known.
1240	/// `pattern_id` specifies a specific pattern in the DFA to run an*
1241	/// anchored search for. If not given, then a search for any pattern is
1242	/// performed. For DFAs built by this crate,
1243	/// [`dense::Config::starts_for_each_pattern`](crate::dfa::dense::Config::starts_for_each_pattern)
1244	/// must be enabled to use this functionality.
1245	/// `start` and `end` permit searching a specific region of the haystack*
1246	/// `bytes`. This is useful when implementing an iterator over matches
1247	/// within the same haystack, which cannot be done correctly by simply
1248	/// providing a subslice of `bytes`. (Because the existence of look-around
1249	/// operations such as `\b`, `^` and `$` need to take the surrounding
1250	/// context into account. This cannot be done if the haystack doesn't
1251	/// contain it.)
1252	///
1253	/// The examples below demonstrate each of these additional parameters.
1254	///
1255	/// # Errors
1256	///
1257	/// This routine only errors if the search could not complete. For
1258	/// DFAs generated by this crate, this only occurs in a non-default
1259	/// configuration where quit bytes are used or Unicode word boundaries are
1260	/// heuristically enabled.
1261	///
1262	/// When a search cannot complete, callers cannot know whether a match
1263	/// exists or not.
1264	///
1265	/// # Panics
1266	///
1267	/// This routine must panic if a `pattern_id` is given and the underlying
1268	/// DFA does not support specific pattern searches.
1269	///
1270	/// It must also panic if the given haystack range is not valid.
1271	///
1272	/// # Example: prefilter
1273	///
1274	/// This example shows how to provide a prefilter for a pattern where all
1275	/// matches start with a `z` byte.
1276	///
1277	/// ```
1278	/// use regex_automata::{
1279	/// dfa::{Automaton, dense},
1280	/// util::prefilter::{Candidate, Prefilter, Scanner, State},
1281	/// HalfMatch,
1282	/// };
1283	///
1284	/// #[derive(Debug)]
1285	/// pub struct ZPrefilter;
1286	///
1287	/// impl Prefilter for ZPrefilter {
1288	/// fn next_candidate(
1289	/// &self,
1290	/// _: &mut State,
1291	/// haystack: &[u8],
1292	/// at: usize,
1293	/// ) -> Candidate {
1294	/// // Try changing b'z' to b'q' and observe this test fail since
1295	/// // the prefilter will skip right over the match.
1296	/// match haystack.iter().position(\|&b\| b == b'z') {
1297	/// None => Candidate::None,
1298	/// Some(i) => Candidate::PossibleStartOfMatch(at + i),
1299	/// }
1300	/// }
1301	///
1302	/// fn heap_bytes(&self) -> usize {
1303	/// `0`
1304	/// }
1305	/// }
1306	///
1307	/// let dfa = dense::DFA::new("z[0-9]{3}")?;
1308	/// let haystack = "foobar z123 q123".as_bytes();
1309	/// // A scanner executes a prefilter while tracking some state that helps
1310	/// // determine whether a prefilter is still "effective" or not.
1311	/// let mut scanner = Scanner::new(&ZPrefilter);
1312	///
1313	/// let expected = Some(HalfMatch::must(`0`, `11`));
1314	/// let got = dfa.find_earliest_fwd_at(
1315	/// Some(&mut scanner),
1316	/// None,
1317	/// haystack,
1318	/// `0`,
1319	/// haystack.len(),
1320	/// )?;
1321	/// assert_eq!(expected, got);
1322	///
1323	/// # Ok::<(), Box<dyn std::error::Error>>(())
1324	/// ```
1325	///
1326	/// # Example: specific pattern search
1327	///
1328	/// This example shows how to build a multi-DFA that permits searching for
1329	/// specific patterns.
1330	///
1331	/// ```
1332	/// use regex_automata::{
1333	/// dfa::{Automaton, dense},
1334	/// HalfMatch,
1335	/// PatternID,
1336	/// };
1337	///
1338	/// let dfa = dense::Builder::new()
1339	/// .configure(dense::Config::new().starts_for_each_pattern(`true`))
1340	/// .build_many(&["[a-z0-9]{6}", "[a-z][a-z0-9]{5}"])?;
1341	/// let haystack = "foo123".as_bytes();
1342	///
1343	/// // Since we are using the default leftmost-first match and both
1344	/// // patterns match at the same starting position, only the first pattern
1345	/// // will be returned in this case when doing a search for any of the
1346	/// // patterns.
1347	/// let expected = Some(HalfMatch::must(`0`, `6`));
1348	/// let got = dfa.find_earliest_fwd_at(
1349	/// None,
1350	/// None,
1351	/// haystack,
1352	/// `0`,
1353	/// haystack.len(),
1354	/// )?;
1355	/// assert_eq!(expected, got);
1356	///
1357	/// // But if we want to check whether some other pattern matches, then we
1358	/// // can provide its pattern ID.
1359	/// let expected = Some(HalfMatch::must(`1`, `6`));
1360	/// let got = dfa.find_earliest_fwd_at(
1361	/// None,
1362	/// Some(PatternID::must(`1`)),
1363	/// haystack,
1364	/// `0`,
1365	/// haystack.len(),
1366	/// )?;
1367	/// assert_eq!(expected, got);
1368	///
1369	/// # Ok::<(), Box<dyn std::error::Error>>(())
1370	/// ```
1371	///
1372	/// # Example: specifying the bounds of a search
1373	///
1374	/// This example shows how providing the bounds of a search can produce
1375	/// different results than simply sub-slicing the haystack.
1376	///
1377	/// ```
1378	/// use regex_automata::{
1379	/// dfa::{Automaton, dense},
1380	/// HalfMatch,
1381	/// };
1382	///
1383	/// // N.B. We disable Unicode here so that we use a simple ASCII word
1384	/// // boundary. Alternatively, we could enable heuristic support for
1385	/// // Unicode word boundaries.
1386	/// let dfa = dense::DFA::new(r"(?-u)\b[0-9]{3}\b")?;
1387	/// let haystack = "foo123bar".as_bytes();
1388	///
1389	/// // Since we sub-slice the haystack, the search doesn't know about the
1390	/// // larger context and assumes that `123` is surrounded by word
1391	/// // boundaries. And of course, the match position is reported relative
1392	/// // to the sub-slice as well, which means we get `3` instead of `6`.
1393	/// let expected = Some(HalfMatch::must(`0`, `3`));
1394	/// let got = dfa.find_earliest_fwd_at(
1395	/// None,
1396	/// None,
1397	/// &haystack[`3`..`6`],
1398	/// `0`,
1399	/// haystack[`3`..`6`].len(),
1400	/// )?;
1401	/// assert_eq!(expected, got);
1402	///
1403	/// // But if we provide the bounds of the search within the context of the
1404	/// // entire haystack, then the search can take the surrounding context
1405	/// // into account. (And if we did find a match, it would be reported
1406	/// // as a valid offset into `haystack` instead of its sub-slice.)
1407	/// let expected = None;
1408	/// let got = dfa.find_earliest_fwd_at(
1409	/// None,
1410	/// None,
1411	/// haystack,
1412	/// `3`,
1413	/// `6`,
1414	/// )?;
1415	/// assert_eq!(expected, got);
1416	///
1417	/// # Ok::<(), Box<dyn std::error::Error>>(())
1418	/// ```
1419	#[inline]
1420	fn find_earliest_fwd_at(
1421	&self,
1422	pre: Option<&mut prefilter::Scanner>,
1423	pattern_id: Option<PatternID>,
1424	bytes: &[u8],
1425	start: usize,
1426	end: usize,
1427	) -> Result<Option<HalfMatch>, MatchError> {
1428	search::find_earliest_fwd(pre, self, pattern_id, bytes, start, end)
1429	}
1430
1431	/// Executes a reverse search and returns the start position of the first
1432	/// match that is found as early as possible. If no match exists, then
1433	/// `None` is returned.
1434	///
1435	/// This routine stops scanning input as soon as the search observes a
1436	/// match state.
1437	///
1438	/// This is like [`Automaton::find_earliest_rev`], except it provides some
1439	/// additional control over how the search is executed. See the
1440	/// documentation of [`Automaton::find_earliest_fwd_at`] for more details
1441	/// on the additional parameters along with examples of their usage.
1442	///
1443	/// # Errors
1444	///
1445	/// This routine only errors if the search could not complete. For
1446	/// DFAs generated by this crate, this only occurs in a non-default
1447	/// configuration where quit bytes are used or Unicode word boundaries are
1448	/// heuristically enabled.
1449	///
1450	/// When a search cannot complete, callers cannot know whether a match
1451	/// exists or not.
1452	///
1453	/// # Panics
1454	///
1455	/// This routine must panic if a `pattern_id` is given and the underlying
1456	/// DFA does not support specific pattern searches.
1457	///
1458	/// It must also panic if the given haystack range is not valid.
1459	#[inline]
1460	fn find_earliest_rev_at(
1461	&self,
1462	pattern_id: Option<PatternID>,
1463	bytes: &[u8],
1464	start: usize,
1465	end: usize,
1466	) -> Result<Option<HalfMatch>, MatchError> {
1467	search::find_earliest_rev(self, pattern_id, bytes, start, end)
1468	}
1469
1470	/// Executes a forward search and returns the end position of the leftmost
1471	/// match that is found. If no match exists, then `None` is returned.
1472	///
1473	/// This is like [`Automaton::find_leftmost_fwd`], except it provides some
1474	/// additional control over how the search is executed. See the
1475	/// documentation of [`Automaton::find_earliest_fwd_at`] for more details
1476	/// on the additional parameters along with examples of their usage.
1477	///
1478	/// # Errors
1479	///
1480	/// This routine only errors if the search could not complete. For
1481	/// DFAs generated by this crate, this only occurs in a non-default
1482	/// configuration where quit bytes are used or Unicode word boundaries are
1483	/// heuristically enabled.
1484	///
1485	/// When a search cannot complete, callers cannot know whether a match
1486	/// exists or not.
1487	///
1488	/// # Panics
1489	///
1490	/// This routine must panic if a `pattern_id` is given and the underlying
1491	/// DFA does not support specific pattern searches.
1492	///
1493	/// It must also panic if the given haystack range is not valid.
1494	#[inline]
1495	fn find_leftmost_fwd_at(
1496	&self,
1497	pre: Option<&mut prefilter::Scanner>,
1498	pattern_id: Option<PatternID>,
1499	bytes: &[u8],
1500	start: usize,
1501	end: usize,
1502	) -> Result<Option<HalfMatch>, MatchError> {
1503	search::find_leftmost_fwd(pre, self, pattern_id, bytes, start, end)
1504	}
1505
1506	/// Executes a reverse search and returns the start of the position of the
1507	/// leftmost match that is found. If no match exists, then `None` is
1508	/// returned.
1509	///
1510	/// This is like [`Automaton::find_leftmost_rev`], except it provides some
1511	/// additional control over how the search is executed. See the
1512	/// documentation of [`Automaton::find_earliest_fwd_at`] for more details
1513	/// on the additional parameters along with examples of their usage.
1514	///
1515	/// # Errors
1516	///
1517	/// This routine only errors if the search could not complete. For
1518	/// DFAs generated by this crate, this only occurs in a non-default
1519	/// configuration where quit bytes are used or Unicode word boundaries are
1520	/// heuristically enabled.
1521	///
1522	/// When a search cannot complete, callers cannot know whether a match
1523	/// exists or not.
1524	///
1525	/// # Panics
1526	///
1527	/// This routine must panic if a `pattern_id` is given and the underlying
1528	/// DFA does not support specific pattern searches.
1529	///
1530	/// It must also panic if the given haystack range is not valid.
1531	#[inline]
1532	fn find_leftmost_rev_at(
1533	&self,
1534	pattern_id: Option<PatternID>,
1535	bytes: &[u8],
1536	start: usize,
1537	end: usize,
1538	) -> Result<Option<HalfMatch>, MatchError> {
1539	search::find_leftmost_rev(self, pattern_id, bytes, start, end)
1540	}
1541
1542	/// Executes an overlapping forward search and returns the end position of
1543	/// matches as they are found. If no match exists, then `None` is returned.
1544	///
1545	/// This routine is principally only useful when searching for multiple
1546	/// patterns on inputs where multiple patterns may match the same regions
1547	/// of text. In particular, callers must preserve the automaton's search
1548	/// state from prior calls so that the implementation knows where the last
1549	/// match occurred.
1550	///
1551	/// This is like [`Automaton::find_overlapping_fwd`], except it provides
1552	/// some additional control over how the search is executed. See the
1553	/// documentation of [`Automaton::find_earliest_fwd_at`] for more details
1554	/// on the additional parameters along with examples of their usage.
1555	///
1556	/// When using this routine to implement an iterator of overlapping
1557	/// matches, the `start` of the search should always be set to the end
1558	/// of the last match. If more patterns match at the previous location,
1559	/// then they will be immediately returned. (This is tracked by the given
1560	/// overlapping state.) Otherwise, the search continues at the starting
1561	/// position given.
1562	///
1563	/// If for some reason you want the search to forget about its previous
1564	/// state and restart the search at a particular position, then setting the
1565	/// state to [`OverlappingState::start`] will accomplish that.
1566	///
1567	/// # Errors
1568	///
1569	/// This routine only errors if the search could not complete. For
1570	/// DFAs generated by this crate, this only occurs in a non-default
1571	/// configuration where quit bytes are used or Unicode word boundaries are
1572	/// heuristically enabled.
1573	///
1574	/// When a search cannot complete, callers cannot know whether a match
1575	/// exists or not.
1576	///
1577	/// # Panics
1578	///
1579	/// This routine must panic if a `pattern_id` is given and the underlying
1580	/// DFA does not support specific pattern searches.
1581	///
1582	/// It must also panic if the given haystack range is not valid.
1583	#[inline]
1584	fn find_overlapping_fwd_at(
1585	&self,
1586	pre: Option<&mut prefilter::Scanner>,
1587	pattern_id: Option<PatternID>,
1588	bytes: &[u8],
1589	start: usize,
1590	end: usize,
1591	state: &mut OverlappingState,
1592	) -> Result<Option<HalfMatch>, MatchError> {
1593	search::find_overlapping_fwd(
1594	pre, self, pattern_id, bytes, start, end, state,
1595	)
1596	}
1597	}
1598
1599	unsafe impl<'a, T: Automaton> Automaton for &'a T {
1600	#[inline]
1601	fn next_state(&self, current: StateID, input: u8) -> StateID {
1602	(**self).next_state(current, input)
1603	}
1604
1605	#[inline]
1606	unsafe fn next_state_unchecked(
1607	&self,
1608	current: StateID,
1609	input: u8,
1610	) -> StateID {
1611	(**self).next_state_unchecked(current, input)
1612	}
1613
1614	#[inline]
1615	fn next_eoi_state(&self, current: StateID) -> StateID {
1616	(**self).next_eoi_state(current)
1617	}
1618
1619	#[inline]
1620	fn start_state_forward(
1621	&self,
1622	pattern_id: Option<PatternID>,
1623	bytes: &[u8],
1624	start: usize,
1625	end: usize,
1626	) -> StateID {
1627	(**self).start_state_forward(pattern_id, bytes, start, end)
1628	}
1629
1630	#[inline]
1631	fn start_state_reverse(
1632	&self,
1633	pattern_id: Option<PatternID>,
1634	bytes: &[u8],
1635	start: usize,
1636	end: usize,
1637	) -> StateID {
1638	(**self).start_state_reverse(pattern_id, bytes, start, end)
1639	}
1640
1641	#[inline]
1642	fn is_special_state(&self, id: StateID) -> bool {
1643	(**self).is_special_state(id)
1644	}
1645
1646	#[inline]
1647	fn is_dead_state(&self, id: StateID) -> bool {
1648	(**self).is_dead_state(id)
1649	}
1650
1651	#[inline]
1652	fn is_quit_state(&self, id: StateID) -> bool {
1653	(**self).is_quit_state(id)
1654	}
1655
1656	#[inline]
1657	fn is_match_state(&self, id: StateID) -> bool {
1658	(**self).is_match_state(id)
1659	}
1660
1661	#[inline]
1662	fn is_start_state(&self, id: StateID) -> bool {
1663	(**self).is_start_state(id)
1664	}
1665
1666	#[inline]
1667	fn is_accel_state(&self, id: StateID) -> bool {
1668	(**self).is_accel_state(id)
1669	}
1670
1671	#[inline]
1672	fn pattern_count(&self) -> usize {
1673	(**self).pattern_count()
1674	}
1675
1676	#[inline]
1677	fn match_count(&self, id: StateID) -> usize {
1678	(**self).match_count(id)
1679	}
1680
1681	#[inline]
1682	fn match_pattern(&self, id: StateID, index: usize) -> PatternID {
1683	(**self).match_pattern(id, index)
1684	}
1685
1686	#[inline]
1687	fn accelerator(&self, id: StateID) -> &[u8] {
1688	(**self).accelerator(id)
1689	}
1690
1691	#[inline]
1692	fn find_earliest_fwd(
1693	&self,
1694	bytes: &[u8],
1695	) -> Result<Option<HalfMatch>, MatchError> {
1696	(**self).find_earliest_fwd(bytes)
1697	}
1698
1699	#[inline]
1700	fn find_earliest_rev(
1701	&self,
1702	bytes: &[u8],
1703	) -> Result<Option<HalfMatch>, MatchError> {
1704	(**self).find_earliest_rev(bytes)
1705	}
1706
1707	#[inline]
1708	fn find_leftmost_fwd(
1709	&self,
1710	bytes: &[u8],
1711	) -> Result<Option<HalfMatch>, MatchError> {
1712	(**self).find_leftmost_fwd(bytes)
1713	}
1714
1715	#[inline]
1716	fn find_leftmost_rev(
1717	&self,
1718	bytes: &[u8],
1719	) -> Result<Option<HalfMatch>, MatchError> {
1720	(**self).find_leftmost_rev(bytes)
1721	}
1722
1723	#[inline]
1724	fn find_overlapping_fwd(
1725	&self,
1726	bytes: &[u8],
1727	state: &mut OverlappingState,
1728	) -> Result<Option<HalfMatch>, MatchError> {
1729	(**self).find_overlapping_fwd(bytes, state)
1730	}
1731
1732	#[inline]
1733	fn find_earliest_fwd_at(
1734	&self,
1735	pre: Option<&mut prefilter::Scanner>,
1736	pattern_id: Option<PatternID>,
1737	bytes: &[u8],
1738	start: usize,
1739	end: usize,
1740	) -> Result<Option<HalfMatch>, MatchError> {
1741	(**self).find_earliest_fwd_at(pre, pattern_id, bytes, start, end)
1742	}
1743
1744	#[inline]
1745	fn find_earliest_rev_at(
1746	&self,
1747	pattern_id: Option<PatternID>,
1748	bytes: &[u8],
1749	start: usize,
1750	end: usize,
1751	) -> Result<Option<HalfMatch>, MatchError> {
1752	(**self).find_earliest_rev_at(pattern_id, bytes, start, end)
1753	}
1754
1755	#[inline]
1756	fn find_leftmost_fwd_at(
1757	&self,
1758	pre: Option<&mut prefilter::Scanner>,
1759	pattern_id: Option<PatternID>,
1760	bytes: &[u8],
1761	start: usize,
1762	end: usize,
1763	) -> Result<Option<HalfMatch>, MatchError> {
1764	(**self).find_leftmost_fwd_at(pre, pattern_id, bytes, start, end)
1765	}
1766
1767	#[inline]
1768	fn find_leftmost_rev_at(
1769	&self,
1770	pattern_id: Option<PatternID>,
1771	bytes: &[u8],
1772	start: usize,
1773	end: usize,
1774	) -> Result<Option<HalfMatch>, MatchError> {
1775	(**self).find_leftmost_rev_at(pattern_id, bytes, start, end)
1776	}
1777
1778	#[inline]
1779	fn find_overlapping_fwd_at(
1780	&self,
1781	pre: Option<&mut prefilter::Scanner>,
1782	pattern_id: Option<PatternID>,
1783	bytes: &[u8],
1784	start: usize,
1785	end: usize,
1786	state: &mut OverlappingState,
1787	) -> Result<Option<HalfMatch>, MatchError> {
1788	(**self)
1789	.find_overlapping_fwd_at(pre, pattern_id, bytes, start, end, state)
1790	}
1791	}
1792
1793	/// Represents the current state of an overlapping search.
1794	///
1795	/// This is used for overlapping searches since they need to know something
1796	/// about the previous search. For example, when multiple patterns match at the
1797	/// same position, this state tracks the last reported pattern so that the next
1798	/// search knows whether to report another matching pattern or continue with
1799	/// the search at the next position. Additionally, it also tracks which state
1800	/// the last search call terminated in.
1801	///
1802	/// This type provides no introspection capabilities. The only thing a caller
1803	/// can do is construct it and pass it around to permit search routines to use
1804	/// it to track state.
1805	///
1806	/// Callers should always provide a fresh state constructed via
1807	/// [`OverlappingState::start`] when starting a new search. Reusing state from
1808	/// a previous search may result in incorrect results.
1809	#[derive(Clone, Debug, Eq, PartialEq)]
1810	pub struct OverlappingState {
1811	/// The state ID of the state at which the search was in when the call
1812	/// terminated. When this is a match state, `last_match` must be set to a
1813	/// non-None value.
1814	///
1815	/// A `None` value indicates the start state of the corresponding
1816	/// automaton. We cannot use the actual ID, since any one automaton may
1817	/// have many start states, and which one is in use depends on several
1818	/// search-time factors.
1819	id: Option<StateID>,
1820	/// Information associated with a match when `id` corresponds to a match
1821	/// state.
1822	last_match: Option<StateMatch>,
1823	}
1824
1825	/// Internal state about the last match that occurred. This records both the
1826	/// offset of the match and the match index.
1827	#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1828	pub(crate) struct StateMatch {
1829	/// The index into the matching patterns for the current match state.
1830	pub(crate) match_index: usize,
1831	/// The offset in the haystack at which the match occurred. This is used
1832	/// when reporting multiple matches at the same offset. That is, when
1833	/// an overlapping search runs, the first thing it checks is whether it's
1834	/// already in a match state, and if so, whether there are more patterns
1835	/// to report as matches in that state. If so, it increments `match_index`
1836	/// and returns the pattern and this offset. Once `match_index` exceeds the
1837	/// number of matching patterns in the current state, the search continues.
1838	pub(crate) offset: usize,
1839	}
1840
1841	impl OverlappingState {
1842	/// Create a new overlapping state that begins at the start state of any
1843	/// automaton.
1844	pub fn start() -> OverlappingState {
1845	OverlappingState { id: None, last_match: None }
1846	}
1847
1848	pub(crate) fn id(&self) -> Option<StateID> {
1849	self.id
1850	}
1851
1852	pub(crate) fn set_id(&mut self, id: StateID) {
1853	self.id = Some(id);
1854	}
1855
1856	pub(crate) fn last_match(&mut self) -> Option<&mut StateMatch> {
1857	self.last_match.as_mut()
1858	}
1859
1860	pub(crate) fn set_last_match(&mut self, last_match: StateMatch) {
1861	self.last_match = Some(last_match);
1862	}
1863	}
1864
1865	/// Write a prefix "state" indicator for fmt::Debug impls.
1866	///
1867	/// Specifically, this tries to succinctly distinguish the different types of
1868	/// states: dead states, quit states, accelerated states, start states and
1869	/// match states. It even accounts for the possible overlappings of different
1870	/// state types.
1871	pub(crate) fn fmt_state_indicator<A: Automaton>(
1872	f: &mut core::fmt::Formatter<'_>,
1873	dfa: A,
1874	id: StateID,
1875	) -> core::fmt::Result {
1876	if dfa.is_dead_state(id) {
1877	write!(f, "D")?;
1878	if dfa.is_start_state(id) {
1879	write!(f, ">")?;
1880	} else {
1881	write!(f, " ")?;
1882	}
1883	} else if dfa.is_quit_state(id) {
1884	write!(f, "Q ")?;
1885	} else if dfa.is_start_state(id) {
1886	if dfa.is_accel_state(id) {
1887	write!(f, "A>")?;
1888	} else {
1889	write!(f, " >")?;
1890	}
1891	} else if dfa.is_match_state(id) {
1892	if dfa.is_accel_state(id) {
1893	write!(f, "A*")?;
1894	} else {
1895	write!(f, " *")?;
1896	}
1897	} else if dfa.is_accel_state(id) {
1898	write!(f, "A ")?;
1899	} else {
1900	write!(f, " ")?;
1901	}
1902	Ok(())
1903	}
1904