search.rs source code [crates/regex-automata-0.4.9/src/hybrid/search.rs]

1	use crate::{
2	hybrid::{
3	dfa::{Cache, OverlappingState, DFA},
4	id::LazyStateID,
5	},
6	util::{
7	prefilter::Prefilter,
8	search::{HalfMatch, Input, MatchError, Span},
9	},
10	};
11
12	#[inline(never)]
13	pub(crate) fn find_fwd(
14	dfa: &DFA,
15	cache: &mut Cache,
16	input: &Input<'_>,
17	) -> Result<Option<HalfMatch>, MatchError> {
18	if input.is_done() {
19	return Ok(None);
20	}
21	let pre = if input.get_anchored().is_anchored() {
22	None
23	} else {
24	dfa.get_config().get_prefilter()
25	};
26	// So what we do here is specialize four different versions of 'find_fwd':
27	// one for each of the combinations for 'has prefilter' and 'is earliest
28	// search'. The reason for doing this is that both of these things require
29	// branches and special handling in some code that can be very hot,
30	// and shaving off as much as we can when we don't need it tends to be
31	// beneficial in ad hoc benchmarks. To see these differences, you often
32	// need a query with a high match count. In other words, specializing these
33	// four routines tends* to help latency more than throughput.*
34	if pre.is_some() {
35	if input.get_earliest() {
36	find_fwd_imp(dfa, cache, input, pre, `true`)
37	} else {
38	find_fwd_imp(dfa, cache, input, pre, `false`)
39	}
40	} else {
41	if input.get_earliest() {
42	find_fwd_imp(dfa, cache, input, None, `true`)
43	} else {
44	find_fwd_imp(dfa, cache, input, None, `false`)
45	}
46	}
47	}
48
49	#[cfg_attr(feature = "perf-inline", inline(always))]
50	fn find_fwd_imp(
51	dfa: &DFA,
52	cache: &mut Cache,
53	input: &Input<'_>,
54	pre: Option<&'_ Prefilter>,
55	earliest: bool,
56	) -> Result<Option<HalfMatch>, MatchError> {
57	// See 'prefilter_restart' docs for explanation.
58	let universal_start = dfa.get_nfa().look_set_prefix_any().is_empty();
59	let mut mat = None;
60	let mut sid = init_fwd(dfa, cache, input)?;
61	let mut at = input.start();
62	// This could just be a closure, but then I think it would be unsound
63	// because it would need to be safe to invoke. This way, the lack of safety
64	// is clearer in the code below.
65	macro_rules! next_unchecked {
66	($sid:expr, $at:expr) => {{
67	let byte = *input.haystack().get_unchecked($at);
68	dfa.next_state_untagged_unchecked(cache, $sid, byte)
69	}};
70	}
71
72	if let Some(ref pre) = pre {
73	let span = Span::from(at..input.end());
74	match pre.find(input.haystack(), span) {
75	None => return Ok(mat),
76	Some(ref span) => {
77	at = span.start;
78	if !universal_start {
79	sid = prefilter_restart(dfa, cache, &input, at)?;
80	}
81	}
82	}
83	}
84	cache.search_start(at);
85	while at < input.end() {
86	if sid.is_tagged() {
87	cache.search_update(at);
88	sid = dfa
89	.next_state(cache, sid, input.haystack()[at])
90	.map_err(\|_\| gave_up(at))?;
91	} else {
92	// SAFETY: There are two safety invariants we need to uphold
93	// here in the loops below: that 'sid' and 'prev_sid' are valid
94	// state IDs for this DFA, and that 'at' is a valid index into
95	// 'haystack'. For the former, we rely on the invariant that
96	// next_state and start_state_forward always returns a valid state*
97	// ID (given a valid state ID in the former case), and that we are
98	// only at this place in the code if 'sid' is untagged. Moreover,
99	// every call to next_state_untagged_unchecked below is guarded by
100	// a check that sid is untagged. For the latter safety invariant,
101	// we always guard unchecked access with a check that 'at' is less
102	// than 'end', where 'end <= haystack.len()'. In the unrolled loop
103	// below, we ensure that 'at' is always in bounds.
104	//
105	// PERF: For justification of omitting bounds checks, it gives us a
106	// ~10% bump in search time. This was used for a benchmark:
107	//
108	// regex-cli find half hybrid -p '(?m)^.+$' -UBb bigfile
109	//
110	// PERF: For justification for the loop unrolling, we use a few
111	// different tests:
112	//
113	// regex-cli find half hybrid -p '\w{50}' -UBb bigfile
114	// regex-cli find half hybrid -p '(?m)^.+$' -UBb bigfile
115	// regex-cli find half hybrid -p 'ZQZQZQZQ' -UBb bigfile
116	//
117	// And there are three different configurations:
118	//
119	// nounroll: this entire 'else' block vanishes and we just
120	// always use 'dfa.next_state(..)'.
121	// unroll1: just the outer loop below
122	// unroll2: just the inner loop below
123	// unroll3: both the outer and inner loops below
124	//
125	// This results in a matrix of timings for each of the above
126	// regexes with each of the above unrolling configurations:
127	//
128	// '\w{50}' '(?m)^.+$' 'ZQZQZQZQ'
129	// nounroll 1.51s 2.34s 1.51s
130	// unroll1 1.53s 2.32s 1.56s
131	// unroll2 2.22s 1.50s 0.61s
132	// unroll3 1.67s 1.45s 0.61s
133	//
134	// Ideally we'd be able to find a configuration that yields the
135	// best time for all regexes, but alas we settle for unroll3 that
136	// gives us almost* the best for '\w{50}' and the best for the*
137	// other two regexes.
138	//
139	// So what exactly is going on here? The first unrolling (grouping
140	// together runs of untagged transitions) specifically targets
141	// our choice of representation. The second unrolling (grouping
142	// together runs of self-transitions) specifically targets a common
143	// DFA topology. Let's dig in a little bit by looking at our
144	// regexes:
145	//
146	// '\w{50}': This regex spends a lot of time outside of the DFA's
147	// start state matching some part of the '\w' repetition. This
148	// means that it's a bit of a worst case for loop unrolling that
149	// targets self-transitions since the self-transitions in '\w{50}'
150	// are not particularly active for this haystack. However, the
151	// first unrolling (grouping together untagged transitions)
152	// does apply quite well here since very few transitions hit
153	// match/dead/quit/unknown states. It is however worth mentioning
154	// that if start states are configured to be tagged (which you
155	// typically want to do if you have a prefilter), then this regex
156	// actually slows way down because it is constantly ping-ponging
157	// out of the unrolled loop and into the handling of a tagged start
158	// state below. But when start states aren't tagged, the unrolled
159	// loop stays hot. (This is why it's imperative that start state
160	// tagging be disabled when there isn't a prefilter!)
161	//
162	// '(?m)^.+$': There are two important aspects of this regex: 1)
163	// on this haystack, its match count is very high, much higher
164	// than the other two regex and 2) it spends the vast majority
165	// of its time matching '.+'. Since Unicode mode is disabled,
166	// this corresponds to repeatedly following self transitions for
167	// the vast majority of the input. This does benefit from the
168	// untagged unrolling since most of the transitions will be to
169	// untagged states, but the untagged unrolling does more work than
170	// what is actually required. Namely, it has to keep track of the
171	// previous and next state IDs, which I guess requires a bit more
172	// shuffling. This is supported by the fact that nounroll+unroll1
173	// are both slower than unroll2+unroll3, where the latter has a
174	// loop unrolling that specifically targets self-transitions.
175	//
176	// 'ZQZQZQZQ': This one is very similar to '(?m)^.+$' because it
177	// spends the vast majority of its time in self-transitions for
178	// the (implicit) unanchored prefix. The main difference with
179	// '(?m)^.+$' is that it has a much lower match count. So there
180	// isn't much time spent in the overhead of reporting matches. This
181	// is the primary explainer in the perf difference here. We include
182	// this regex and the former to make sure we have comparison points
183	// with high and low match counts.
184	//
185	// NOTE: I used 'OpenSubtitles2018.raw.sample.en' for 'bigfile'.
186	//
187	// NOTE: In a follow-up, it turns out that the "inner" loop
188	// mentioned above was a pretty big pessimization in some other
189	// cases. Namely, it resulted in too much ping-ponging into and out
190	// of the loop, which resulted in nearly ~2x regressions in search
191	// time when compared to the originally lazy DFA in the regex crate.
192	// So I've removed the second loop unrolling that targets the
193	// self-transition case.
194	let mut prev_sid = sid;
195	while at < input.end() {
196	prev_sid = unsafe { next_unchecked!(sid, at) };
197	if prev_sid.is_tagged() \|\| at + `3` >= input.end() {
198	core::mem::swap(&mut prev_sid, &mut sid);
199	break;
200	}
201	at += `1`;
202
203	sid = unsafe { next_unchecked!(prev_sid, at) };
204	if sid.is_tagged() {
205	break;
206	}
207	at += `1`;
208
209	prev_sid = unsafe { next_unchecked!(sid, at) };
210	if prev_sid.is_tagged() {
211	core::mem::swap(&mut prev_sid, &mut sid);
212	break;
213	}
214	at += `1`;
215
216	sid = unsafe { next_unchecked!(prev_sid, at) };
217	if sid.is_tagged() {
218	break;
219	}
220	at += `1`;
221	}
222	// If we quit out of the code above with an unknown state ID at
223	// any point, then we need to re-compute that transition using
224	// 'next_state', which will do NFA powerset construction for us.
225	if sid.is_unknown() {
226	cache.search_update(at);
227	sid = dfa
228	.next_state(cache, prev_sid, input.haystack()[at])
229	.map_err(\|_\| gave_up(at))?;
230	}
231	}
232	if sid.is_tagged() {
233	if sid.is_start() {
234	if let Some(ref pre) = pre {
235	let span = Span::from(at..input.end());
236	match pre.find(input.haystack(), span) {
237	None => {
238	cache.search_finish(span.end);
239	return Ok(mat);
240	}
241	Some(ref span) => {
242	// We want to skip any update to 'at' below
243	// at the end of this iteration and just
244	// jump immediately back to the next state
245	// transition at the leading position of the
246	// candidate match.
247	//
248	// ... but only if we actually made progress
249	// with our prefilter, otherwise if the start
250	// state has a self-loop, we can get stuck.
251	if span.start > at {
252	at = span.start;
253	if !universal_start {
254	sid = prefilter_restart(
255	dfa, cache, &input, at,
256	)?;
257	}
258	continue;
259	}
260	}
261	}
262	}
263	} else if sid.is_match() {
264	let pattern = dfa.match_pattern(cache, sid, `0`);
265	// Since slice ranges are inclusive at the beginning and
266	// exclusive at the end, and since forward searches report
267	// the end, we can return 'at' as-is. This only works because
268	// matches are delayed by 1 byte. So by the time we observe a
269	// match, 'at' has already been set to 1 byte past the actual
270	// match location, which is precisely the exclusive ending
271	// bound of the match.
272	mat = Some(HalfMatch::new(pattern, at));
273	if earliest {
274	cache.search_finish(at);
275	return Ok(mat);
276	}
277	} else if sid.is_dead() {
278	cache.search_finish(at);
279	return Ok(mat);
280	} else if sid.is_quit() {
281	cache.search_finish(at);
282	return Err(MatchError::quit(input.haystack()[at], at));
283	} else {
284	debug_assert!(sid.is_unknown());
285	unreachable!("sid being unknown is a bug");
286	}
287	}
288	at += `1`;
289	}
290	eoi_fwd(dfa, cache, input, &mut sid, &mut mat)?;
291	cache.search_finish(input.end());
292	Ok(mat)
293	}
294
295	#[inline(never)]
296	pub(crate) fn find_rev(
297	dfa: &DFA,
298	cache: &mut Cache,
299	input: &Input<'_>,
300	) -> Result<Option<HalfMatch>, MatchError> {
301	if input.is_done() {
302	return Ok(None);
303	}
304	if input.get_earliest() {
305	find_rev_imp(dfa, cache, input, earliest:`true`)
306	} else {
307	find_rev_imp(dfa, cache, input, earliest:`false`)
308	}
309	}
310
311	#[cfg_attr(feature = "perf-inline", inline(always))]
312	fn find_rev_imp(
313	dfa: &DFA,
314	cache: &mut Cache,
315	input: &Input<'_>,
316	earliest: bool,
317	) -> Result<Option<HalfMatch>, MatchError> {
318	let mut mat = None;
319	let mut sid = init_rev(dfa, cache, input)?;
320	// In reverse search, the loop below can't handle the case of searching an
321	// empty slice. Ideally we could write something congruent to the forward
322	// search, i.e., 'while at >= start', but 'start' might be 0. Since we use
323	// an unsigned offset, 'at >= 0' is trivially always true. We could avoid
324	// this extra case handling by using a signed offset, but Rust makes it
325	// annoying to do. So... We just handle the empty case separately.
326	if input.start() == input.end() {
327	eoi_rev(dfa, cache, input, &mut sid, &mut mat)?;
328	return Ok(mat);
329	}
330
331	let mut at = input.end() - `1`;
332	macro_rules! next_unchecked {
333	($sid:expr, $at:expr) => {{
334	let byte = *input.haystack().get_unchecked($at);
335	dfa.next_state_untagged_unchecked(cache, $sid, byte)
336	}};
337	}
338	cache.search_start(at);
339	loop {
340	if sid.is_tagged() {
341	cache.search_update(at);
342	sid = dfa
343	.next_state(cache, sid, input.haystack()[at])
344	.map_err(\|_\| gave_up(at))?;
345	} else {
346	// SAFETY: See comments in 'find_fwd' for a safety argument.
347	//
348	// PERF: The comments in 'find_fwd' also provide a justification
349	// from a performance perspective as to 1) why we elide bounds
350	// checks and 2) why we do a specialized version of unrolling
351	// below. The reverse search does have a slightly different
352	// consideration in that most reverse searches tend to be
353	// anchored and on shorter haystacks. However, this still makes a
354	// difference. Take this command for example:
355	//
356	// regex-cli find match hybrid -p '(?m)^.+$' -UBb bigfile
357	//
358	// (Notice that we use 'find hybrid regex', not 'find hybrid dfa'
359	// like in the justification for the forward direction. The 'regex'
360	// sub-command will find start-of-match and thus run the reverse
361	// direction.)
362	//
363	// Without unrolling below, the above command takes around 3.76s.
364	// But with the unrolling below, we get down to 2.55s. If we keep
365	// the unrolling but add in bounds checks, then we get 2.86s.
366	//
367	// NOTE: I used 'OpenSubtitles2018.raw.sample.en' for 'bigfile'.
368	let mut prev_sid = sid;
369	while at >= input.start() {
370	prev_sid = unsafe { next_unchecked!(sid, at) };
371	if prev_sid.is_tagged()
372	\|\| at <= input.start().saturating_add(`3`)
373	{
374	core::mem::swap(&mut prev_sid, &mut sid);
375	break;
376	}
377	at -= `1`;
378
379	sid = unsafe { next_unchecked!(prev_sid, at) };
380	if sid.is_tagged() {
381	break;
382	}
383	at -= `1`;
384
385	prev_sid = unsafe { next_unchecked!(sid, at) };
386	if prev_sid.is_tagged() {
387	core::mem::swap(&mut prev_sid, &mut sid);
388	break;
389	}
390	at -= `1`;
391
392	sid = unsafe { next_unchecked!(prev_sid, at) };
393	if sid.is_tagged() {
394	break;
395	}
396	at -= `1`;
397	}
398	// If we quit out of the code above with an unknown state ID at
399	// any point, then we need to re-compute that transition using
400	// 'next_state', which will do NFA powerset construction for us.
401	if sid.is_unknown() {
402	cache.search_update(at);
403	sid = dfa
404	.next_state(cache, prev_sid, input.haystack()[at])
405	.map_err(\|_\| gave_up(at))?;
406	}
407	}
408	if sid.is_tagged() {
409	if sid.is_start() {
410	// do nothing
411	} else if sid.is_match() {
412	let pattern = dfa.match_pattern(cache, sid, `0`);
413	// Since reverse searches report the beginning of a match
414	// and the beginning is inclusive (not exclusive like the
415	// end of a match), we add 1 to make it inclusive.
416	mat = Some(HalfMatch::new(pattern, at + `1`));
417	if earliest {
418	cache.search_finish(at);
419	return Ok(mat);
420	}
421	} else if sid.is_dead() {
422	cache.search_finish(at);
423	return Ok(mat);
424	} else if sid.is_quit() {
425	cache.search_finish(at);
426	return Err(MatchError::quit(input.haystack()[at], at));
427	} else {
428	debug_assert!(sid.is_unknown());
429	unreachable!("sid being unknown is a bug");
430	}
431	}
432	if at == input.start() {
433	break;
434	}
435	at -= `1`;
436	}
437	cache.search_finish(input.start());
438	eoi_rev(dfa, cache, input, &mut sid, &mut mat)?;
439	Ok(mat)
440	}
441
442	#[inline(never)]
443	pub(crate) fn find_overlapping_fwd(
444	dfa: &DFA,
445	cache: &mut Cache,
446	input: &Input<'_>,
447	state: &mut OverlappingState,
448	) -> Result<(), MatchError> {
449	state.mat = None;
450	if input.is_done() {
451	return Ok(());
452	}
453	let pre: Option<&Prefilter> = if input.get_anchored().is_anchored() {
454	None
455	} else {
456	dfa.get_config().get_prefilter()
457	};
458	if pre.is_some() {
459	find_overlapping_fwd_imp(dfa, cache, input, pre, state)
460	} else {
461	find_overlapping_fwd_imp(dfa, cache, input, pre:None, state)
462	}
463	}
464
465	#[cfg_attr(feature = "perf-inline", inline(always))]
466	fn find_overlapping_fwd_imp(
467	dfa: &DFA,
468	cache: &mut Cache,
469	input: &Input<'_>,
470	pre: Option<&'_ Prefilter>,
471	state: &mut OverlappingState,
472	) -> Result<(), MatchError> {
473	// See 'prefilter_restart' docs for explanation.
474	let universal_start = dfa.get_nfa().look_set_prefix_any().is_empty();
475	let mut sid = match state.id {
476	None => {
477	state.at = input.start();
478	init_fwd(dfa, cache, input)?
479	}
480	Some(sid) => {
481	if let Some(match_index) = state.next_match_index {
482	let match_len = dfa.match_len(cache, sid);
483	if match_index < match_len {
484	state.next_match_index = Some(match_index + `1`);
485	let pattern = dfa.match_pattern(cache, sid, match_index);
486	state.mat = Some(HalfMatch::new(pattern, state.at));
487	return Ok(());
488	}
489	}
490	// Once we've reported all matches at a given position, we need to
491	// advance the search to the next position.
492	state.at += `1`;
493	if state.at > input.end() {
494	return Ok(());
495	}
496	sid
497	}
498	};
499
500	// NOTE: We don't optimize the crap out of this routine primarily because
501	// it seems like most overlapping searches will have higher match counts,
502	// and thus, throughput is perhaps not as important. But if you have a use
503	// case for something faster, feel free to file an issue.
504	cache.search_start(state.at);
505	while state.at < input.end() {
506	sid = dfa
507	.next_state(cache, sid, input.haystack()[state.at])
508	.map_err(\|_\| gave_up(state.at))?;
509	if sid.is_tagged() {
510	state.id = Some(sid);
511	if sid.is_start() {
512	if let Some(ref pre) = pre {
513	let span = Span::from(state.at..input.end());
514	match pre.find(input.haystack(), span) {
515	None => return Ok(()),
516	Some(ref span) => {
517	if span.start > state.at {
518	state.at = span.start;
519	if !universal_start {
520	sid = prefilter_restart(
521	dfa, cache, &input, state.at,
522	)?;
523	}
524	continue;
525	}
526	}
527	}
528	}
529	} else if sid.is_match() {
530	state.next_match_index = Some(`1`);
531	let pattern = dfa.match_pattern(cache, sid, `0`);
532	state.mat = Some(HalfMatch::new(pattern, state.at));
533	cache.search_finish(state.at);
534	return Ok(());
535	} else if sid.is_dead() {
536	cache.search_finish(state.at);
537	return Ok(());
538	} else if sid.is_quit() {
539	cache.search_finish(state.at);
540	return Err(MatchError::quit(
541	input.haystack()[state.at],
542	state.at,
543	));
544	} else {
545	debug_assert!(sid.is_unknown());
546	unreachable!("sid being unknown is a bug");
547	}
548	}
549	state.at += `1`;
550	cache.search_update(state.at);
551	}
552
553	let result = eoi_fwd(dfa, cache, input, &mut sid, &mut state.mat);
554	state.id = Some(sid);
555	if state.mat.is_some() {
556	// '1' is always correct here since if we get to this point, this
557	// always corresponds to the first (index '0') match discovered at
558	// this position. So the next match to report at this position (if
559	// it exists) is at index '1'.
560	state.next_match_index = Some(`1`);
561	}
562	cache.search_finish(input.end());
563	result
564	}
565
566	#[inline(never)]
567	pub(crate) fn find_overlapping_rev(
568	dfa: &DFA,
569	cache: &mut Cache,
570	input: &Input<'_>,
571	state: &mut OverlappingState,
572	) -> Result<(), MatchError> {
573	state.mat = None;
574	if input.is_done() {
575	return Ok(());
576	}
577	let mut sid = match state.id {
578	None => {
579	let sid = init_rev(dfa, cache, input)?;
580	state.id = Some(sid);
581	if input.start() == input.end() {
582	state.rev_eoi = `true`;
583	} else {
584	state.at = input.end() - `1`;
585	}
586	sid
587	}
588	Some(sid) => {
589	if let Some(match_index) = state.next_match_index {
590	let match_len = dfa.match_len(cache, sid);
591	if match_index < match_len {
592	state.next_match_index = Some(match_index + `1`);
593	let pattern = dfa.match_pattern(cache, sid, match_index);
594	state.mat = Some(HalfMatch::new(pattern, state.at));
595	return Ok(());
596	}
597	}
598	// Once we've reported all matches at a given position, we need
599	// to advance the search to the next position. However, if we've
600	// already followed the EOI transition, then we know we're done
601	// with the search and there cannot be any more matches to report.
602	if state.rev_eoi {
603	return Ok(());
604	} else if state.at == input.start() {
605	// At this point, we should follow the EOI transition. This
606	// will cause us the skip the main loop below and fall through
607	// to the final 'eoi_rev' transition.
608	state.rev_eoi = `true`;
609	} else {
610	// We haven't hit the end of the search yet, so move on.
611	state.at -= `1`;
612	}
613	sid
614	}
615	};
616	cache.search_start(state.at);
617	while !state.rev_eoi {
618	sid = dfa
619	.next_state(cache, sid, input.haystack()[state.at])
620	.map_err(\|_\| gave_up(state.at))?;
621	if sid.is_tagged() {
622	state.id = Some(sid);
623	if sid.is_start() {
624	// do nothing
625	} else if sid.is_match() {
626	state.next_match_index = Some(`1`);
627	let pattern = dfa.match_pattern(cache, sid, `0`);
628	state.mat = Some(HalfMatch::new(pattern, state.at + `1`));
629	cache.search_finish(state.at);
630	return Ok(());
631	} else if sid.is_dead() {
632	cache.search_finish(state.at);
633	return Ok(());
634	} else if sid.is_quit() {
635	cache.search_finish(state.at);
636	return Err(MatchError::quit(
637	input.haystack()[state.at],
638	state.at,
639	));
640	} else {
641	debug_assert!(sid.is_unknown());
642	unreachable!("sid being unknown is a bug");
643	}
644	}
645	if state.at == input.start() {
646	break;
647	}
648	state.at -= `1`;
649	cache.search_update(state.at);
650	}
651
652	let result = eoi_rev(dfa, cache, input, &mut sid, &mut state.mat);
653	state.rev_eoi = `true`;
654	state.id = Some(sid);
655	if state.mat.is_some() {
656	// '1' is always correct here since if we get to this point, this
657	// always corresponds to the first (index '0') match discovered at
658	// this position. So the next match to report at this position (if
659	// it exists) is at index '1'.
660	state.next_match_index = Some(`1`);
661	}
662	cache.search_finish(input.start());
663	result
664	}
665
666	#[cfg_attr(feature = "perf-inline", inline(always))]
667	fn init_fwd(
668	dfa: &DFA,
669	cache: &mut Cache,
670	input: &Input<'_>,
671	) -> Result<LazyStateID, MatchError> {
672	let sid: LazyStateID = dfa.start_state_forward(cache, input)?;
673	// Start states can never be match states, since all matches are delayed
674	// by 1 byte.
675	debug_assert!(!sid.is_match());
676	Ok(sid)
677	}
678
679	#[cfg_attr(feature = "perf-inline", inline(always))]
680	fn init_rev(
681	dfa: &DFA,
682	cache: &mut Cache,
683	input: &Input<'_>,
684	) -> Result<LazyStateID, MatchError> {
685	let sid: LazyStateID = dfa.start_state_reverse(cache, input)?;
686	// Start states can never be match states, since all matches are delayed
687	// by 1 byte.
688	debug_assert!(!sid.is_match());
689	Ok(sid)
690	}
691
692	#[cfg_attr(feature = "perf-inline", inline(always))]
693	fn eoi_fwd(
694	dfa: &DFA,
695	cache: &mut Cache,
696	input: &Input<'_>,
697	sid: &mut LazyStateID,
698	mat: &mut Option<HalfMatch>,
699	) -> Result<(), MatchError> {
700	let sp = input.get_span();
701	match input.haystack().get(sp.end) {
702	Some(&b) => {
703	*sid =
704	dfa.next_state(cache, *sid, b).map_err(\|_\| gave_up(sp.end))?;
705	if sid.is_match() {
706	let pattern = dfa.match_pattern(cache, *sid, `0`);
707	*mat = Some(HalfMatch::new(pattern, sp.end));
708	} else if sid.is_quit() {
709	return Err(MatchError::quit(b, sp.end));
710	}
711	}
712	None => {
713	*sid = dfa
714	.next_eoi_state(cache, *sid)
715	.map_err(\|_\| gave_up(input.haystack().len()))?;
716	if sid.is_match() {
717	let pattern = dfa.match_pattern(cache, *sid, `0`);
718	*mat = Some(HalfMatch::new(pattern, input.haystack().len()));
719	}
720	// N.B. We don't have to check 'is_quit' here because the EOI
721	// transition can never lead to a quit state.
722	debug_assert!(!sid.is_quit());
723	}
724	}
725	Ok(())
726	}
727
728	#[cfg_attr(feature = "perf-inline", inline(always))]
729	fn eoi_rev(
730	dfa: &DFA,
731	cache: &mut Cache,
732	input: &Input<'_>,
733	sid: &mut LazyStateID,
734	mat: &mut Option<HalfMatch>,
735	) -> Result<(), MatchError> {
736	let sp = input.get_span();
737	if sp.start > `0` {
738	let byte = input.haystack()[sp.start - `1`];
739	*sid = dfa
740	.next_state(cache, *sid, byte)
741	.map_err(\|_\| gave_up(sp.start))?;
742	if sid.is_match() {
743	let pattern = dfa.match_pattern(cache, *sid, `0`);
744	*mat = Some(HalfMatch::new(pattern, sp.start));
745	} else if sid.is_quit() {
746	return Err(MatchError::quit(byte, sp.start - `1`));
747	}
748	} else {
749	*sid =
750	dfa.next_eoi_state(cache, *sid).map_err(\|_\| gave_up(sp.start))?;
751	if sid.is_match() {
752	let pattern = dfa.match_pattern(cache, *sid, `0`);
753	*mat = Some(HalfMatch::new(pattern, `0`));
754	}
755	// N.B. We don't have to check 'is_quit' here because the EOI
756	// transition can never lead to a quit state.
757	debug_assert!(!sid.is_quit());
758	}
759	Ok(())
760	}
761
762	/// Re-compute the starting state that a DFA should be in after finding a
763	/// prefilter candidate match at the position `at`.
764	///
765	/// It is always correct to call this, but not always necessary. Namely,
766	/// whenever the DFA has a universal start state, the DFA can remain in the
767	/// start state that it was in when it ran the prefilter. Why? Because in that
768	/// case, there is only one start state.
769	///
770	/// When does a DFA have a universal start state? In precisely cases where
771	/// it has no look-around assertions in its prefix. So for example, `\bfoo`
772	/// does not have a universal start state because the start state depends on
773	/// whether the byte immediately before the start position is a word byte or
774	/// not. However, `foo\b` does have a universal start state because the word
775	/// boundary does not appear in the pattern's prefix.
776	///
777	/// So... most cases don't need this, but when a pattern doesn't have a
778	/// universal start state, then after a prefilter candidate has been found, the
779	/// current state must* be re-litigated as if computing the start state at the*
780	/// beginning of the search because it might change. That is, not all start
781	/// states are created equal.
782	///
783	/// Why avoid it? Because while it's not super expensive, it isn't a trivial
784	/// operation to compute the start state. It is much better to avoid it and
785	/// just state in the current state if you know it to be correct.
786	#[cfg_attr(feature = "perf-inline", inline(always))]
787	fn prefilter_restart(
788	dfa: &DFA,
789	cache: &mut Cache,
790	input: &Input<'_>,
791	at: usize,
792	) -> Result<LazyStateID, MatchError> {
793	let mut input: Input<'_> = input.clone();
794	input.set_start(at);
795	init_fwd(dfa, cache, &input)
796	}
797
798	/// A convenience routine for constructing a "gave up" match error.
799	#[cfg_attr(feature = "perf-inline", inline(always))]
800	fn gave_up(offset: usize) -> MatchError {
801	MatchError::gave_up(offset)
802	}
803