strategy.rs - Codebrowser

1	use core::{
2	fmt::Debug,
3	panic::{RefUnwindSafe, UnwindSafe},
4	};
5
6	use alloc::sync::Arc;
7
8	use regex_syntax::hir::{literal, Hir};
9
10	use crate::{
11	meta::{
12	error::{BuildError, RetryError, RetryFailError, RetryQuadraticError},
13	regex::{Cache, RegexInfo},
14	reverse_inner, wrappers,
15	},
16	nfa::thompson::{self, WhichCaptures, NFA},
17	util::{
18	captures::{Captures, GroupInfo},
19	look::LookMatcher,
20	prefilter::{self, Prefilter, PrefilterI},
21	primitives::{NonMaxUsize, PatternID},
22	search::{Anchored, HalfMatch, Input, Match, MatchKind, PatternSet},
23	},
24	};
25
26	/// A trait that represents a single meta strategy. Its main utility is in
27	/// providing a way to do dynamic dispatch over a few choices.
28	///
29	/// Why dynamic dispatch? I actually don't have a super compelling reason, and
30	/// importantly, I have not benchmarked it with the main alternative: an enum.
31	/// I went with dynamic dispatch initially because the regex engine search code
32	/// really can't be inlined into caller code in most cases because it's just
33	/// too big. In other words, it is already expected that every regex search
34	/// will entail at least the cost of a function call.
35	///
36	/// I do wonder whether using enums would result in better codegen overall
37	/// though. It's a worthwhile experiment to try. Probably the most interesting
38	/// benchmark to run in such a case would be one with a high match count. That
39	/// is, a benchmark to test the overall latency of a search call.
40	pub(super) trait Strategy:
41	Debug + Send + Sync + RefUnwindSafe + UnwindSafe + 'static
42	{
43	fn group_info(&self) -> &GroupInfo;
44
45	fn create_cache(&self) -> Cache;
46
47	fn reset_cache(&self, cache: &mut Cache);
48
49	fn is_accelerated(&self) -> bool;
50
51	fn memory_usage(&self) -> usize;
52
53	fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match>;
54
55	fn search_half(
56	&self,
57	cache: &mut Cache,
58	input: &Input<'_>,
59	) -> Option<HalfMatch>;
60
61	fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool;
62
63	fn search_slots(
64	&self,
65	cache: &mut Cache,
66	input: &Input<'_>,
67	slots: &mut [Option<NonMaxUsize>],
68	) -> Option<PatternID>;
69
70	fn which_overlapping_matches(
71	&self,
72	cache: &mut Cache,
73	input: &Input<'_>,
74	patset: &mut PatternSet,
75	);
76	}
77
78	pub(super) fn new(
79	info: &RegexInfo,
80	hirs: &[&Hir],
81	) -> Result<Arc<dyn Strategy>, BuildError> {
82	// At this point, we're committed to a regex engine of some kind. So pull
83	// out a prefilter if we can, which will feed to each of the constituent
84	// regex engines.
85	let pre = if info.is_always_anchored_start() {
86	// PERF: I'm not sure we necessarily want to do this... We may want to
87	// run a prefilter for quickly rejecting in some cases. The problem
88	// is that anchored searches overlap quite a bit with the use case
89	// of "run a regex on every line to extract data." In that case, the
90	// regex always matches, so running a prefilter doesn't really help us
91	// there. The main place where a prefilter helps in an anchored search
92	// is if the anchored search is not expected to match frequently. That
93	// is, the prefilter gives us a way to possibly reject a haystack very
94	// quickly.
95	//
96	// Maybe we should do use a prefilter, but only for longer haystacks?
97	// Or maybe we should only use a prefilter when we think it's "fast"?
98	//
99	// Interestingly, I think we currently lack the infrastructure for
100	// disabling a prefilter based on haystack length. That would probably
101	// need to be a new 'Input' option. (Interestingly, an 'Input' used to
102	// carry a 'Prefilter' with it, but I moved away from that.)
103	debug!("skipping literal extraction since regex is anchored");
104	None
105	} else if let Some(pre) = info.config().get_prefilter() {
106	debug!(
107	"skipping literal extraction since the caller provided a prefilter"
108	);
109	Some(pre.clone())
110	} else if info.config().get_auto_prefilter() {
111	let kind = info.config().get_match_kind();
112	let prefixes = crate::util::prefilter::prefixes(kind, hirs);
113	// If we can build a full `Strategy` from just the extracted prefixes,
114	// then we can short-circuit and avoid building a regex engine at all.
115	if let Some(pre) = Pre::from_prefixes(info, &prefixes) {
116	debug!(
117	"found that the regex can be broken down to a literal \
118	search, avoiding the regex engine entirely",
119	);
120	return Ok(pre);
121	}
122	// This now attempts another short-circuit of the regex engine: if we
123	// have a huge alternation of just plain literals, then we can just use
124	// Aho-Corasick for that and avoid the regex engine entirely.
125	//
126	// You might think this case would just be handled by
127	// `Pre::from_prefixes`, but that technique relies on heuristic literal
128	// extraction from the corresponding `Hir`. That works, but part of
129	// heuristics limit the size and number of literals returned. This case
130	// will specifically handle patterns with very large alternations.
131	//
132	// One wonders if we should just roll this our heuristic literal
133	// extraction, and then I think this case could disappear entirely.
134	if let Some(pre) = Pre::from_alternation_literals(info, hirs) {
135	debug!(
136	"found plain alternation of literals, \
137	avoiding regex engine entirely and using Aho-Corasick"
138	);
139	return Ok(pre);
140	}
141	prefixes.literals().and_then(\|strings\| {
142	debug!(
143	"creating prefilter from {} literals: {:?}",
144	strings.len(),
145	strings,
146	);
147	Prefilter::new(kind, strings)
148	})
149	} else {
150	debug!("skipping literal extraction since prefilters were disabled");
151	None
152	};
153	let mut core = Core::new(info.clone(), pre.clone(), hirs)?;
154	// Now that we have our core regex engines built, there are a few cases
155	// where we can do a little bit better than just a normal "search forward
156	// and maybe use a prefilter when in a start state." However, these cases
157	// may not always work or otherwise build on top of the Core searcher.
158	// For example, the reverse anchored optimization seems like it might
159	// always work, but only the DFAs support reverse searching and the DFAs
160	// might give up or quit for reasons. If we had, e.g., a PikeVM that
161	// supported reverse searching, then we could avoid building a full Core
162	// engine for this case.
163	core = match ReverseAnchored::new(core) {
164	Err(core) => core,
165	Ok(ra) => {
166	debug!("using reverse anchored strategy");
167	return Ok(Arc::new(ra));
168	}
169	};
170	core = match ReverseSuffix::new(core, hirs) {
171	Err(core) => core,
172	Ok(rs) => {
173	debug!("using reverse suffix strategy");
174	return Ok(Arc::new(rs));
175	}
176	};
177	core = match ReverseInner::new(core, hirs) {
178	Err(core) => core,
179	Ok(ri) => {
180	debug!("using reverse inner strategy");
181	return Ok(Arc::new(ri));
182	}
183	};
184	debug!("using core strategy");
185	Ok(Arc::new(core))
186	}
187
188	#[derive(Clone, Debug)]
189	struct Pre<P> {
190	pre: P,
191	group_info: GroupInfo,
192	}
193
194	impl<P: PrefilterI> Pre<P> {
195	fn new(pre: P) -> Arc<dyn Strategy> {
196	// The only thing we support when we use prefilters directly as a
197	// strategy is the start and end of the overall match for a single
198	// pattern. In other words, exactly one implicit capturing group. Which
199	// is exactly what we use here for a GroupInfo.
200	let group_info = GroupInfo::new([[None::<&str>]]).unwrap();
201	Arc::new(Pre { pre, group_info })
202	}
203	}
204
205	// This is a little weird, but we don't actually care about the type parameter
206	// here because we're selecting which underlying prefilter to use. So we just
207	// define it on an arbitrary type.
208	impl Pre<()> {
209	/// Given a sequence of prefixes, attempt to return a full `Strategy` using
210	/// just the prefixes.
211	///
212	/// Basically, this occurs when the prefixes given not just prefixes,
213	/// but an enumeration of the entire language matched by the regular
214	/// expression.
215	///
216	/// A number of other conditions need to be true too. For example, there
217	/// can be only one pattern, the number of explicit capture groups is 0, no
218	/// look-around assertions and so on.
219	///
220	/// Note that this ignores `Config::get_auto_prefilter` because if this
221	/// returns something, then it isn't a prefilter but a matcher itself.
222	/// Therefore, it shouldn't suffer from the problems typical to prefilters
223	/// (such as a high false positive rate).
224	fn from_prefixes(
225	info: &RegexInfo,
226	prefixes: &literal::Seq,
227	) -> Option<Arc<dyn Strategy>> {
228	let kind = info.config().get_match_kind();
229	// Check to see if our prefixes are exact, which means we might be
230	// able to bypass the regex engine entirely and just rely on literal
231	// searches.
232	if !prefixes.is_exact() {
233	return None;
234	}
235	// We also require that we have a single regex pattern. Namely,
236	// we reuse the prefilter infrastructure to implement search and
237	// prefilters only report spans. Prefilters don't know about pattern
238	// IDs. The multi-regex case isn't a lost cause, we might still use
239	// Aho-Corasick and we might still just use a regular prefilter, but
240	// that's done below.
241	if info.pattern_len() != `1` {
242	return None;
243	}
244	// We can't have any capture groups either. The literal engines don't
245	// know how to deal with things like '(foo)(bar)'. In that case, a
246	// prefilter will just be used and then the regex engine will resolve
247	// the capture groups.
248	if info.props()[`0`].explicit_captures_len() != `0` {
249	return None;
250	}
251	// We also require that it has zero look-around assertions. Namely,
252	// literal extraction treats look-around assertions as if they match
253	// every* empty string. But of course, that isn't true. So for*
254	// example, 'foo\bquux' never matches anything, but 'fooquux' is
255	// extracted from that as an exact literal. Such cases should just run
256	// the regex engine. 'fooquux' will be used as a normal prefilter, and
257	// then the regex engine will try to look for an actual match.
258	if !info.props()[`0`].look_set().is_empty() {
259	return None;
260	}
261	// Finally, currently, our prefilters are all oriented around
262	// leftmost-first match semantics, so don't try to use them if the
263	// caller asked for anything else.
264	if kind != MatchKind::LeftmostFirst {
265	return None;
266	}
267	// The above seems like a lot of requirements to meet, but it applies
268	// to a lot of cases. 'foo', '[abc][123]' and 'foo\|bar\|quux' all meet
269	// the above criteria, for example.
270	//
271	// Note that this is effectively a latency optimization. If we didn't
272	// do this, then the extracted literals would still get bundled into
273	// a prefilter, and every regex engine capable of running unanchored
274	// searches supports prefilters. So this optimization merely sidesteps
275	// having to run the regex engine at all to confirm the match. Thus, it
276	// decreases the latency of a match.
277
278	// OK because we know the set is exact and thus finite.
279	let prefixes = prefixes.literals().unwrap();
280	debug!(
281	"trying to bypass regex engine by creating \
282	prefilter from {} literals: {:?}",
283	prefixes.len(),
284	prefixes,
285	);
286	let choice = match prefilter::Choice::new(kind, prefixes) {
287	Some(choice) => choice,
288	None => {
289	debug!(
290	"regex bypass failed because no prefilter could be built"
291	);
292	return None;
293	}
294	};
295	let strat: Arc<dyn Strategy> = match choice {
296	prefilter::Choice::Memchr(pre) => Pre::new(pre),
297	prefilter::Choice::Memchr2(pre) => Pre::new(pre),
298	prefilter::Choice::Memchr3(pre) => Pre::new(pre),
299	prefilter::Choice::Memmem(pre) => Pre::new(pre),
300	prefilter::Choice::Teddy(pre) => Pre::new(pre),
301	prefilter::Choice::ByteSet(pre) => Pre::new(pre),
302	prefilter::Choice::AhoCorasick(pre) => Pre::new(pre),
303	};
304	Some(strat)
305	}
306
307	/// Attempts to extract an alternation of literals, and if it's deemed
308	/// worth doing, returns an Aho-Corasick prefilter as a strategy.
309	///
310	/// And currently, this only returns something when 'hirs.len() == 1'. This
311	/// could in theory do something if there are multiple HIRs where all of
312	/// them are alternation of literals, but I haven't had the time to go down
313	/// that path yet.
314	fn from_alternation_literals(
315	info: &RegexInfo,
316	hirs: &[&Hir],
317	) -> Option<Arc<dyn Strategy>> {
318	use crate::util::prefilter::AhoCorasick;
319
320	let lits = crate::meta::literal::alternation_literals(info, hirs)?;
321	let ac = AhoCorasick::new(MatchKind::LeftmostFirst, &lits)?;
322	Some(Pre::new(ac))
323	}
324	}
325
326	// This implements Strategy for anything that implements PrefilterI.
327	//
328	// Note that this must only be used for regexes of length 1. Multi-regexes
329	// don't work here. The prefilter interface only provides the span of a match
330	// and not the pattern ID. (I did consider making it more expressive, but I
331	// couldn't figure out how to tie everything together elegantly.) Thus, so long
332	// as the regex only contains one pattern, we can simply assume that a match
333	// corresponds to PatternID::ZERO. And indeed, that's what we do here.
334	//
335	// In practice, since this impl is used to report matches directly and thus
336	// completely bypasses the regex engine, we only wind up using this under the
337	// following restrictions:
338	//
339	// There must be only one pattern. As explained above.*
340	// The literal sequence must be finite and only contain exact literals.*
341	// There must not be any look-around assertions. If there are, the literals*
342	// extracted might be exact, but a match doesn't necessarily imply an overall
343	// match. As a trivial example, 'foo\bbar' does not match 'foobar'.
344	// The pattern must not have any explicit capturing groups. If it does, the*
345	// caller might expect them to be resolved. e.g., 'foo(bar)'.
346	//
347	// So when all of those things are true, we use a prefilter directly as a
348	// strategy.
349	//
350	// In the case where the number of patterns is more than 1, we don't use this
351	// but do use a special Aho-Corasick strategy if all of the regexes are just
352	// simple literals or alternations of literals. (We also use the Aho-Corasick
353	// strategy when len(patterns)==1 if the number of literals is large. In that
354	// case, literal extraction gives up and will return an infinite set.)
355	impl<P: PrefilterI> Strategy for Pre<P> {
356	#[cfg_attr(feature = "perf-inline", inline(always))]
357	fn group_info(&self) -> &GroupInfo {
358	&self.group_info
359	}
360
361	fn create_cache(&self) -> Cache {
362	Cache {
363	capmatches: Captures::all(self.group_info().clone()),
364	pikevm: wrappers::PikeVMCache::none(),
365	backtrack: wrappers::BoundedBacktrackerCache::none(),
366	onepass: wrappers::OnePassCache::none(),
367	hybrid: wrappers::HybridCache::none(),
368	revhybrid: wrappers::ReverseHybridCache::none(),
369	}
370	}
371
372	fn reset_cache(&self, _cache: &mut Cache) {}
373
374	fn is_accelerated(&self) -> bool {
375	self.pre.is_fast()
376	}
377
378	fn memory_usage(&self) -> usize {
379	self.pre.memory_usage()
380	}
381
382	#[cfg_attr(feature = "perf-inline", inline(always))]
383	fn search(&self, _cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
384	if input.is_done() {
385	return None;
386	}
387	if input.get_anchored().is_anchored() {
388	return self
389	.pre
390	.prefix(input.haystack(), input.get_span())
391	.map(\|sp\| Match::new(PatternID::ZERO, sp));
392	}
393	self.pre
394	.find(input.haystack(), input.get_span())
395	.map(\|sp\| Match::new(PatternID::ZERO, sp))
396	}
397
398	#[cfg_attr(feature = "perf-inline", inline(always))]
399	fn search_half(
400	&self,
401	cache: &mut Cache,
402	input: &Input<'_>,
403	) -> Option<HalfMatch> {
404	self.search(cache, input).map(\|m\| HalfMatch::new(m.pattern(), m.end()))
405	}
406
407	#[cfg_attr(feature = "perf-inline", inline(always))]
408	fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
409	self.search(cache, input).is_some()
410	}
411
412	#[cfg_attr(feature = "perf-inline", inline(always))]
413	fn search_slots(
414	&self,
415	cache: &mut Cache,
416	input: &Input<'_>,
417	slots: &mut [Option<NonMaxUsize>],
418	) -> Option<PatternID> {
419	let m = self.search(cache, input)?;
420	if let Some(slot) = slots.get_mut(`0`) {
421	*slot = NonMaxUsize::new(m.start());
422	}
423	if let Some(slot) = slots.get_mut(`1`) {
424	*slot = NonMaxUsize::new(m.end());
425	}
426	Some(m.pattern())
427	}
428
429	#[cfg_attr(feature = "perf-inline", inline(always))]
430	fn which_overlapping_matches(
431	&self,
432	cache: &mut Cache,
433	input: &Input<'_>,
434	patset: &mut PatternSet,
435	) {
436	if self.search(cache, input).is_some() {
437	patset.insert(PatternID::ZERO);
438	}
439	}
440	}
441
442	#[derive(Debug)]
443	struct Core {
444	info: RegexInfo,
445	pre: Option<Prefilter>,
446	nfa: NFA,
447	nfarev: Option<NFA>,
448	pikevm: wrappers::PikeVM,
449	backtrack: wrappers::BoundedBacktracker,
450	onepass: wrappers::OnePass,
451	hybrid: wrappers::Hybrid,
452	dfa: wrappers::DFA,
453	}
454
455	impl Core {
456	fn new(
457	info: RegexInfo,
458	pre: Option<Prefilter>,
459	hirs: &[&Hir],
460	) -> Result<Core, BuildError> {
461	let mut lookm = LookMatcher::new();
462	lookm.set_line_terminator(info.config().get_line_terminator());
463	let thompson_config = thompson::Config::new()
464	.utf8(info.config().get_utf8_empty())
465	.nfa_size_limit(info.config().get_nfa_size_limit())
466	.shrink(`false`)
467	.which_captures(info.config().get_which_captures())
468	.look_matcher(lookm);
469	let nfa = thompson::Compiler::new()
470	.configure(thompson_config.clone())
471	.build_many_from_hir(hirs)
472	.map_err(BuildError::nfa)?;
473	// It's possible for the PikeVM or the BB to fail to build, even though
474	// at this point, we already have a full NFA in hand. They can fail
475	// when a Unicode word boundary is used but where Unicode word boundary
476	// support is disabled at compile time, thus making it impossible to
477	// match. (Construction can also fail if the NFA was compiled without
478	// captures, but we always enable that above.)
479	let pikevm = wrappers::PikeVM::new(&info, pre.clone(), &nfa)?;
480	let backtrack =
481	wrappers::BoundedBacktracker::new(&info, pre.clone(), &nfa)?;
482	// The onepass engine can of course fail to build, but we expect it to
483	// fail in many cases because it is an optimization that doesn't apply
484	// to all regexes. The 'OnePass' wrapper encapsulates this failure (and
485	// logs a message if it occurs).
486	let onepass = wrappers::OnePass::new(&info, &nfa);
487	// We try to encapsulate whether a particular regex engine should be
488	// used within each respective wrapper, but the DFAs need a reverse NFA
489	// to build itself, and we really do not want to build a reverse NFA if
490	// we know we aren't going to use the lazy DFA. So we do a config check
491	// up front, which is in practice the only way we won't try to use the
492	// DFA.
493	let (nfarev, hybrid, dfa) =
494	if !info.config().get_hybrid() && !info.config().get_dfa() {
495	(None, wrappers::Hybrid::none(), wrappers::DFA::none())
496	} else {
497	// FIXME: Technically, we don't quite yet KNOW that we need
498	// a reverse NFA. It's possible for the DFAs below to both
499	// fail to build just based on the forward NFA. In which case,
500	// building the reverse NFA was totally wasted work. But...
501	// fixing this requires breaking DFA construction apart into
502	// two pieces: one for the forward part and another for the
503	// reverse part. Quite annoying. Making it worse, when building
504	// both DFAs fails, it's quite likely that the NFA is large and
505	// that it will take quite some time to build the reverse NFA
506	// too. So... it's really probably worth it to do this!
507	let nfarev = thompson::Compiler::new()
508	// Currently, reverse NFAs don't support capturing groups,
509	// so we MUST disable them. But even if we didn't have to,
510	// we would, because nothing in this crate does anything
511	// useful with capturing groups in reverse. And of course,
512	// the lazy DFA ignores capturing groups in all cases.
513	.configure(
514	thompson_config
515	.clone()
516	.which_captures(WhichCaptures::None)
517	.reverse(`true`),
518	)
519	.build_many_from_hir(hirs)
520	.map_err(BuildError::nfa)?;
521	let dfa = if !info.config().get_dfa() {
522	wrappers::DFA::none()
523	} else {
524	wrappers::DFA::new(&info, pre.clone(), &nfa, &nfarev)
525	};
526	let hybrid = if !info.config().get_hybrid() {
527	wrappers::Hybrid::none()
528	} else if dfa.is_some() {
529	debug!("skipping lazy DFA because we have a full DFA");
530	wrappers::Hybrid::none()
531	} else {
532	wrappers::Hybrid::new(&info, pre.clone(), &nfa, &nfarev)
533	};
534	(Some(nfarev), hybrid, dfa)
535	};
536	Ok(Core {
537	info,
538	pre,
539	nfa,
540	nfarev,
541	pikevm,
542	backtrack,
543	onepass,
544	hybrid,
545	dfa,
546	})
547	}
548
549	#[cfg_attr(feature = "perf-inline", inline(always))]
550	fn try_search_mayfail(
551	&self,
552	cache: &mut Cache,
553	input: &Input<'_>,
554	) -> Option<Result<Option<Match>, RetryFailError>> {
555	if let Some(e) = self.dfa.get(input) {
556	trace!("using full DFA for search at {:?}", input.get_span());
557	Some(e.try_search(input))
558	} else if let Some(e) = self.hybrid.get(input) {
559	trace!("using lazy DFA for search at {:?}", input.get_span());
560	Some(e.try_search(&mut cache.hybrid, input))
561	} else {
562	None
563	}
564	}
565
566	fn search_nofail(
567	&self,
568	cache: &mut Cache,
569	input: &Input<'_>,
570	) -> Option<Match> {
571	let caps = &mut cache.capmatches;
572	caps.set_pattern(None);
573	// We manually inline 'try_search_slots_nofail' here because we need to
574	// borrow from 'cache.capmatches' in this method, but if we do, then
575	// we can't pass 'cache' wholesale to to 'try_slots_no_hybrid'. It's a
576	// classic example of how the borrow checker inhibits decomposition.
577	// There are of course work-arounds (more types and/or interior
578	// mutability), but that's more annoying than this IMO.
579	let pid = if let Some(ref e) = self.onepass.get(input) {
580	trace!("using OnePass for search at {:?}", input.get_span());
581	e.search_slots(&mut cache.onepass, input, caps.slots_mut())
582	} else if let Some(ref e) = self.backtrack.get(input) {
583	trace!(
584	"using BoundedBacktracker for search at {:?}",
585	input.get_span()
586	);
587	e.search_slots(&mut cache.backtrack, input, caps.slots_mut())
588	} else {
589	trace!("using PikeVM for search at {:?}", input.get_span());
590	let e = self.pikevm.get();
591	e.search_slots(&mut cache.pikevm, input, caps.slots_mut())
592	};
593	caps.set_pattern(pid);
594	caps.get_match()
595	}
596
597	fn search_half_nofail(
598	&self,
599	cache: &mut Cache,
600	input: &Input<'_>,
601	) -> Option<HalfMatch> {
602	// Only the lazy/full DFA returns half-matches, since the DFA requires
603	// a reverse scan to find the start position. These fallback regex
604	// engines can find the start and end in a single pass, so we just do
605	// that and throw away the start offset to conform to the API.
606	let m = self.search_nofail(cache, input)?;
607	Some(HalfMatch::new(m.pattern(), m.end()))
608	}
609
610	fn search_slots_nofail(
611	&self,
612	cache: &mut Cache,
613	input: &Input<'_>,
614	slots: &mut [Option<NonMaxUsize>],
615	) -> Option<PatternID> {
616	if let Some(ref e) = self.onepass.get(input) {
617	trace!(
618	"using OnePass for capture search at {:?}",
619	input.get_span()
620	);
621	e.search_slots(&mut cache.onepass, input, slots)
622	} else if let Some(ref e) = self.backtrack.get(input) {
623	trace!(
624	"using BoundedBacktracker for capture search at {:?}",
625	input.get_span()
626	);
627	e.search_slots(&mut cache.backtrack, input, slots)
628	} else {
629	trace!(
630	"using PikeVM for capture search at {:?}",
631	input.get_span()
632	);
633	let e = self.pikevm.get();
634	e.search_slots(&mut cache.pikevm, input, slots)
635	}
636	}
637
638	fn is_match_nofail(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
639	if let Some(ref e) = self.onepass.get(input) {
640	trace!(
641	"using OnePass for is-match search at {:?}",
642	input.get_span()
643	);
644	e.search_slots(&mut cache.onepass, input, &mut []).is_some()
645	} else if let Some(ref e) = self.backtrack.get(input) {
646	trace!(
647	"using BoundedBacktracker for is-match search at {:?}",
648	input.get_span()
649	);
650	e.is_match(&mut cache.backtrack, input)
651	} else {
652	trace!(
653	"using PikeVM for is-match search at {:?}",
654	input.get_span()
655	);
656	let e = self.pikevm.get();
657	e.is_match(&mut cache.pikevm, input)
658	}
659	}
660
661	fn is_capture_search_needed(&self, slots_len: usize) -> bool {
662	slots_len > self.nfa.group_info().implicit_slot_len()
663	}
664	}
665
666	impl Strategy for Core {
667	#[cfg_attr(feature = "perf-inline", inline(always))]
668	fn group_info(&self) -> &GroupInfo {
669	self.nfa.group_info()
670	}
671
672	#[cfg_attr(feature = "perf-inline", inline(always))]
673	fn create_cache(&self) -> Cache {
674	Cache {
675	capmatches: Captures::all(self.group_info().clone()),
676	pikevm: self.pikevm.create_cache(),
677	backtrack: self.backtrack.create_cache(),
678	onepass: self.onepass.create_cache(),
679	hybrid: self.hybrid.create_cache(),
680	revhybrid: wrappers::ReverseHybridCache::none(),
681	}
682	}
683
684	#[cfg_attr(feature = "perf-inline", inline(always))]
685	fn reset_cache(&self, cache: &mut Cache) {
686	cache.pikevm.reset(&self.pikevm);
687	cache.backtrack.reset(&self.backtrack);
688	cache.onepass.reset(&self.onepass);
689	cache.hybrid.reset(&self.hybrid);
690	}
691
692	fn is_accelerated(&self) -> bool {
693	self.pre.as_ref().map_or(`false`, \|pre\| pre.is_fast())
694	}
695
696	fn memory_usage(&self) -> usize {
697	self.info.memory_usage()
698	+ self.pre.as_ref().map_or(`0`, \|pre\| pre.memory_usage())
699	+ self.nfa.memory_usage()
700	+ self.nfarev.as_ref().map_or(`0`, \|nfa\| nfa.memory_usage())
701	+ self.onepass.memory_usage()
702	+ self.dfa.memory_usage()
703	}
704
705	#[cfg_attr(feature = "perf-inline", inline(always))]
706	fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
707	// We manually inline try_search_mayfail here because letting the
708	// compiler do it seems to produce pretty crappy codegen.
709	return if let Some(e) = self.dfa.get(input) {
710	trace!("using full DFA for full search at {:?}", input.get_span());
711	match e.try_search(input) {
712	Ok(x) => x,
713	Err(_err) => {
714	trace!("full DFA search failed: {}", _err);
715	self.search_nofail(cache, input)
716	}
717	}
718	} else if let Some(e) = self.hybrid.get(input) {
719	trace!("using lazy DFA for full search at {:?}", input.get_span());
720	match e.try_search(&mut cache.hybrid, input) {
721	Ok(x) => x,
722	Err(_err) => {
723	trace!("lazy DFA search failed: {}", _err);
724	self.search_nofail(cache, input)
725	}
726	}
727	} else {
728	self.search_nofail(cache, input)
729	};
730	}
731
732	#[cfg_attr(feature = "perf-inline", inline(always))]
733	fn search_half(
734	&self,
735	cache: &mut Cache,
736	input: &Input<'_>,
737	) -> Option<HalfMatch> {
738	// The main difference with 'search' is that if we're using a DFA, we
739	// can use a single forward scan without needing to run the reverse
740	// DFA.
741	if let Some(e) = self.dfa.get(input) {
742	trace!("using full DFA for half search at {:?}", input.get_span());
743	match e.try_search_half_fwd(input) {
744	Ok(x) => x,
745	Err(_err) => {
746	trace!("full DFA half search failed: {}", _err);
747	self.search_half_nofail(cache, input)
748	}
749	}
750	} else if let Some(e) = self.hybrid.get(input) {
751	trace!("using lazy DFA for half search at {:?}", input.get_span());
752	match e.try_search_half_fwd(&mut cache.hybrid, input) {
753	Ok(x) => x,
754	Err(_err) => {
755	trace!("lazy DFA half search failed: {}", _err);
756	self.search_half_nofail(cache, input)
757	}
758	}
759	} else {
760	self.search_half_nofail(cache, input)
761	}
762	}
763
764	#[cfg_attr(feature = "perf-inline", inline(always))]
765	fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
766	if let Some(e) = self.dfa.get(input) {
767	trace!(
768	"using full DFA for is-match search at {:?}",
769	input.get_span()
770	);
771	match e.try_search_half_fwd(input) {
772	Ok(x) => x.is_some(),
773	Err(_err) => {
774	trace!("full DFA half search failed: {}", _err);
775	self.is_match_nofail(cache, input)
776	}
777	}
778	} else if let Some(e) = self.hybrid.get(input) {
779	trace!(
780	"using lazy DFA for is-match search at {:?}",
781	input.get_span()
782	);
783	match e.try_search_half_fwd(&mut cache.hybrid, input) {
784	Ok(x) => x.is_some(),
785	Err(_err) => {
786	trace!("lazy DFA half search failed: {}", _err);
787	self.is_match_nofail(cache, input)
788	}
789	}
790	} else {
791	self.is_match_nofail(cache, input)
792	}
793	}
794
795	#[cfg_attr(feature = "perf-inline", inline(always))]
796	fn search_slots(
797	&self,
798	cache: &mut Cache,
799	input: &Input<'_>,
800	slots: &mut [Option<NonMaxUsize>],
801	) -> Option<PatternID> {
802	// Even if the regex has explicit capture groups, if the caller didn't
803	// provide any explicit slots, then it doesn't make sense to try and do
804	// extra work to get offsets for those slots. Ideally the caller should
805	// realize this and not call this routine in the first place, but alas,
806	// we try to save the caller from themselves if they do.
807	if !self.is_capture_search_needed(slots.len()) {
808	trace!("asked for slots unnecessarily, trying fast path");
809	let m = self.search(cache, input)?;
810	copy_match_to_slots(m, slots);
811	return Some(m.pattern());
812	}
813	// If the onepass DFA is available for this search (which only happens
814	// when it's anchored), then skip running a fallible DFA. The onepass
815	// DFA isn't as fast as a full or lazy DFA, but it is typically quite
816	// a bit faster than the backtracker or the PikeVM. So it isn't as
817	// advantageous to try and do a full/lazy DFA scan first.
818	//
819	// We still theorize that it's better to do a full/lazy DFA scan, even
820	// when it's anchored, because it's usually much faster and permits us
821	// to say "no match" much more quickly. This does hurt the case of,
822	// say, parsing each line in a log file into capture groups, because
823	// in that case, the line always matches. So the lazy DFA scan is
824	// usually just wasted work. But, the lazy DFA is usually quite fast
825	// and doesn't cost too much here.
826	if self.onepass.get(&input).is_some() {
827	return self.search_slots_nofail(cache, &input, slots);
828	}
829	let m = match self.try_search_mayfail(cache, input) {
830	Some(Ok(Some(m))) => m,
831	Some(Ok(None)) => return None,
832	Some(Err(_err)) => {
833	trace!("fast capture search failed: {}", _err);
834	return self.search_slots_nofail(cache, input, slots);
835	}
836	None => {
837	return self.search_slots_nofail(cache, input, slots);
838	}
839	};
840	// At this point, now that we've found the bounds of the
841	// match, we need to re-run something that can resolve
842	// capturing groups. But we only need to run on it on the
843	// match bounds and not the entire haystack.
844	trace!(
845	"match found at {}..{} in capture search, \
846	using another engine to find captures",
847	m.start(),
848	m.end(),
849	);
850	let input = input
851	.clone()
852	.span(m.start()..m.end())
853	.anchored(Anchored::Pattern(m.pattern()));
854	Some(
855	self.search_slots_nofail(cache, &input, slots)
856	.expect("should find a match"),
857	)
858	}
859
860	#[cfg_attr(feature = "perf-inline", inline(always))]
861	fn which_overlapping_matches(
862	&self,
863	cache: &mut Cache,
864	input: &Input<'_>,
865	patset: &mut PatternSet,
866	) {
867	if let Some(e) = self.dfa.get(input) {
868	trace!(
869	"using full DFA for overlapping search at {:?}",
870	input.get_span()
871	);
872	let _err = match e.try_which_overlapping_matches(input, patset) {
873	Ok(()) => return,
874	Err(err) => err,
875	};
876	trace!("fast overlapping search failed: {}", _err);
877	} else if let Some(e) = self.hybrid.get(input) {
878	trace!(
879	"using lazy DFA for overlapping search at {:?}",
880	input.get_span()
881	);
882	let _err = match e.try_which_overlapping_matches(
883	&mut cache.hybrid,
884	input,
885	patset,
886	) {
887	Ok(()) => {
888	return;
889	}
890	Err(err) => err,
891	};
892	trace!("fast overlapping search failed: {}", _err);
893	}
894	trace!(
895	"using PikeVM for overlapping search at {:?}",
896	input.get_span()
897	);
898	let e = self.pikevm.get();
899	e.which_overlapping_matches(&mut cache.pikevm, input, patset)
900	}
901	}
902
903	#[derive(Debug)]
904	struct ReverseAnchored {
905	core: Core,
906	}
907
908	impl ReverseAnchored {
909	fn new(core: Core) -> Result<ReverseAnchored, Core> {
910	if !core.info.is_always_anchored_end() {
911	debug!(
912	"skipping reverse anchored optimization because \
913	the regex is not always anchored at the end"
914	);
915	return Err(core);
916	}
917	// Note that the caller can still request an anchored search even when
918	// the regex isn't anchored at the start. We detect that case in the
919	// search routines below and just fallback to the core engine. This
920	// is fine because both searches are anchored. It's just a matter of
921	// picking one. Falling back to the core engine is a little simpler,
922	// since if we used the reverse anchored approach, we'd have to add an
923	// extra check to ensure the match reported starts at the place where
924	// the caller requested the search to start.
925	if core.info.is_always_anchored_start() {
926	debug!(
927	"skipping reverse anchored optimization because \
928	the regex is also anchored at the start"
929	);
930	return Err(core);
931	}
932	// Only DFAs can do reverse searches (currently), so we need one of
933	// them in order to do this optimization. It's possible (although
934	// pretty unlikely) that we have neither and need to give up.
935	if !core.hybrid.is_some() && !core.dfa.is_some() {
936	debug!(
937	"skipping reverse anchored optimization because \
938	we don't have a lazy DFA or a full DFA"
939	);
940	return Err(core);
941	}
942	Ok(ReverseAnchored { core })
943	}
944
945	#[cfg_attr(feature = "perf-inline", inline(always))]
946	fn try_search_half_anchored_rev(
947	&self,
948	cache: &mut Cache,
949	input: &Input<'_>,
950	) -> Result<Option<HalfMatch>, RetryFailError> {
951	// We of course always want an anchored search. In theory, the
952	// underlying regex engines should automatically enable anchored
953	// searches since the regex is itself anchored, but this more clearly
954	// expresses intent and is always correct.
955	let input = input.clone().anchored(Anchored::Yes);
956	if let Some(e) = self.core.dfa.get(&input) {
957	trace!(
958	"using full DFA for reverse anchored search at {:?}",
959	input.get_span()
960	);
961	e.try_search_half_rev(&input)
962	} else if let Some(e) = self.core.hybrid.get(&input) {
963	trace!(
964	"using lazy DFA for reverse anchored search at {:?}",
965	input.get_span()
966	);
967	e.try_search_half_rev(&mut cache.hybrid, &input)
968	} else {
969	unreachable!("ReverseAnchored always has a DFA")
970	}
971	}
972	}
973
974	// Note that in this impl, we don't check that 'input.end() ==
975	// input.haystack().len()'. In particular, when that condition is false, a
976	// match is always impossible because we know that the regex is always anchored
977	// at the end (or else 'ReverseAnchored' won't be built). We don't check that
978	// here because the 'Regex' wrapper actually does that for us in all cases.
979	// Thus, in this impl, we can actually assume that the end position in 'input'
980	// is equivalent to the length of the haystack.
981	impl Strategy for ReverseAnchored {
982	#[cfg_attr(feature = "perf-inline", inline(always))]
983	fn group_info(&self) -> &GroupInfo {
984	self.core.group_info()
985	}
986
987	#[cfg_attr(feature = "perf-inline", inline(always))]
988	fn create_cache(&self) -> Cache {
989	self.core.create_cache()
990	}
991
992	#[cfg_attr(feature = "perf-inline", inline(always))]
993	fn reset_cache(&self, cache: &mut Cache) {
994	self.core.reset_cache(cache);
995	}
996
997	fn is_accelerated(&self) -> bool {
998	// Since this is anchored at the end, a reverse anchored search is
999	// almost certainly guaranteed to result in a much faster search than
1000	// a standard forward search.
1001	`true`
1002	}
1003
1004	fn memory_usage(&self) -> usize {
1005	self.core.memory_usage()
1006	}
1007
1008	#[cfg_attr(feature = "perf-inline", inline(always))]
1009	fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
1010	if input.get_anchored().is_anchored() {
1011	return self.core.search(cache, input);
1012	}
1013	match self.try_search_half_anchored_rev(cache, input) {
1014	Err(_err) => {
1015	trace!("fast reverse anchored search failed: {}", _err);
1016	self.core.search_nofail(cache, input)
1017	}
1018	Ok(None) => None,
1019	Ok(Some(hm)) => {
1020	Some(Match::new(hm.pattern(), hm.offset()..input.end()))
1021	}
1022	}
1023	}
1024
1025	#[cfg_attr(feature = "perf-inline", inline(always))]
1026	fn search_half(
1027	&self,
1028	cache: &mut Cache,
1029	input: &Input<'_>,
1030	) -> Option<HalfMatch> {
1031	if input.get_anchored().is_anchored() {
1032	return self.core.search_half(cache, input);
1033	}
1034	match self.try_search_half_anchored_rev(cache, input) {
1035	Err(_err) => {
1036	trace!("fast reverse anchored search failed: {}", _err);
1037	self.core.search_half_nofail(cache, input)
1038	}
1039	Ok(None) => None,
1040	Ok(Some(hm)) => {
1041	// Careful here! 'try_search_half' is a forward* search that*
1042	// only cares about the end* position of a match. But*
1043	// 'hm.offset()' is actually the start of the match. So we
1044	// actually just throw that away here and, since we know we
1045	// have a match, return the only possible position at which a
1046	// match can occur: input.end().
1047	Some(HalfMatch::new(hm.pattern(), input.end()))
1048	}
1049	}
1050	}
1051
1052	#[cfg_attr(feature = "perf-inline", inline(always))]
1053	fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
1054	if input.get_anchored().is_anchored() {
1055	return self.core.is_match(cache, input);
1056	}
1057	match self.try_search_half_anchored_rev(cache, input) {
1058	Err(_err) => {
1059	trace!("fast reverse anchored search failed: {}", _err);
1060	self.core.is_match_nofail(cache, input)
1061	}
1062	Ok(None) => `false`,
1063	Ok(Some(_)) => `true`,
1064	}
1065	}
1066
1067	#[cfg_attr(feature = "perf-inline", inline(always))]
1068	fn search_slots(
1069	&self,
1070	cache: &mut Cache,
1071	input: &Input<'_>,
1072	slots: &mut [Option<NonMaxUsize>],
1073	) -> Option<PatternID> {
1074	if input.get_anchored().is_anchored() {
1075	return self.core.search_slots(cache, input, slots);
1076	}
1077	match self.try_search_half_anchored_rev(cache, input) {
1078	Err(_err) => {
1079	trace!("fast reverse anchored search failed: {}", _err);
1080	self.core.search_slots_nofail(cache, input, slots)
1081	}
1082	Ok(None) => None,
1083	Ok(Some(hm)) => {
1084	if !self.core.is_capture_search_needed(slots.len()) {
1085	trace!("asked for slots unnecessarily, skipping captures");
1086	let m = Match::new(hm.pattern(), hm.offset()..input.end());
1087	copy_match_to_slots(m, slots);
1088	return Some(m.pattern());
1089	}
1090	let start = hm.offset();
1091	let input = input
1092	.clone()
1093	.span(start..input.end())
1094	.anchored(Anchored::Pattern(hm.pattern()));
1095	self.core.search_slots_nofail(cache, &input, slots)
1096	}
1097	}
1098	}
1099
1100	#[cfg_attr(feature = "perf-inline", inline(always))]
1101	fn which_overlapping_matches(
1102	&self,
1103	cache: &mut Cache,
1104	input: &Input<'_>,
1105	patset: &mut PatternSet,
1106	) {
1107	// It seems like this could probably benefit from a reverse anchored
1108	// optimization, perhaps by doing an overlapping reverse search (which
1109	// the DFAs do support). I haven't given it much thought though, and
1110	// I'm currently focus more on the single pattern case.
1111	self.core.which_overlapping_matches(cache, input, patset)
1112	}
1113	}
1114
1115	#[derive(Debug)]
1116	struct ReverseSuffix {
1117	core: Core,
1118	pre: Prefilter,
1119	}
1120
1121	impl ReverseSuffix {
1122	fn new(core: Core, hirs: &[&Hir]) -> Result<ReverseSuffix, Core> {
1123	if !core.info.config().get_auto_prefilter() {
1124	debug!(
1125	"skipping reverse suffix optimization because \
1126	automatic prefilters are disabled"
1127	);
1128	return Err(core);
1129	}
1130	// Like the reverse inner optimization, we don't do this for regexes
1131	// that are always anchored. It could lead to scanning too much, but
1132	// could say "no match" much more quickly than running the regex
1133	// engine if the initial literal scan doesn't match. With that said,
1134	// the reverse suffix optimization has lower overhead, since it only
1135	// requires a reverse scan after a literal match to confirm or reject
1136	// the match. (Although, in the case of confirmation, it then needs to
1137	// do another forward scan to find the end position.)
1138	//
1139	// Note that the caller can still request an anchored search even
1140	// when the regex isn't anchored. We detect that case in the search
1141	// routines below and just fallback to the core engine. Currently this
1142	// optimization assumes all searches are unanchored, so if we do want
1143	// to enable this optimization for anchored searches, it will need a
1144	// little work to support it.
1145	if core.info.is_always_anchored_start() {
1146	debug!(
1147	"skipping reverse suffix optimization because \
1148	the regex is always anchored at the start",
1149	);
1150	return Err(core);
1151	}
1152	// Only DFAs can do reverse searches (currently), so we need one of
1153	// them in order to do this optimization. It's possible (although
1154	// pretty unlikely) that we have neither and need to give up.
1155	if !core.hybrid.is_some() && !core.dfa.is_some() {
1156	debug!(
1157	"skipping reverse suffix optimization because \
1158	we don't have a lazy DFA or a full DFA"
1159	);
1160	return Err(core);
1161	}
1162	if core.pre.as_ref().map_or(`false`, \|p\| p.is_fast()) {
1163	debug!(
1164	"skipping reverse suffix optimization because \
1165	we already have a prefilter that we think is fast"
1166	);
1167	return Err(core);
1168	}
1169	let kind = core.info.config().get_match_kind();
1170	let suffixes = crate::util::prefilter::suffixes(kind, hirs);
1171	let lcs = match suffixes.longest_common_suffix() {
1172	None => {
1173	debug!(
1174	"skipping reverse suffix optimization because \
1175	a longest common suffix could not be found",
1176	);
1177	return Err(core);
1178	}
1179	Some(lcs) if lcs.is_empty() => {
1180	debug!(
1181	"skipping reverse suffix optimization because \
1182	the longest common suffix is the empty string",
1183	);
1184	return Err(core);
1185	}
1186	Some(lcs) => lcs,
1187	};
1188	let pre = match Prefilter::new(kind, &[lcs]) {
1189	Some(pre) => pre,
1190	None => {
1191	debug!(
1192	"skipping reverse suffix optimization because \
1193	a prefilter could not be constructed from the \
1194	longest common suffix",
1195	);
1196	return Err(core);
1197	}
1198	};
1199	if !pre.is_fast() {
1200	debug!(
1201	"skipping reverse suffix optimization because \
1202	while we have a suffix prefilter, it is not \
1203	believed to be 'fast'"
1204	);
1205	return Err(core);
1206	}
1207	Ok(ReverseSuffix { core, pre })
1208	}
1209
1210	#[cfg_attr(feature = "perf-inline", inline(always))]
1211	fn try_search_half_start(
1212	&self,
1213	cache: &mut Cache,
1214	input: &Input<'_>,
1215	) -> Result<Option<HalfMatch>, RetryError> {
1216	let mut span = input.get_span();
1217	let mut min_start = `0`;
1218	loop {
1219	let litmatch = match self.pre.find(input.haystack(), span) {
1220	None => return Ok(None),
1221	Some(span) => span,
1222	};
1223	trace!("reverse suffix scan found suffix match at {:?}", litmatch);
1224	let revinput = input
1225	.clone()
1226	.anchored(Anchored::Yes)
1227	.span(input.start()..litmatch.end);
1228	match self
1229	.try_search_half_rev_limited(cache, &revinput, min_start)?
1230	{
1231	None => {
1232	if span.start >= span.end {
1233	break;
1234	}
1235	span.start = litmatch.start.checked_add(`1`).unwrap();
1236	}
1237	Some(hm) => return Ok(Some(hm)),
1238	}
1239	min_start = litmatch.end;
1240	}
1241	Ok(None)
1242	}
1243
1244	#[cfg_attr(feature = "perf-inline", inline(always))]
1245	fn try_search_half_fwd(
1246	&self,
1247	cache: &mut Cache,
1248	input: &Input<'_>,
1249	) -> Result<Option<HalfMatch>, RetryFailError> {
1250	if let Some(e) = self.core.dfa.get(&input) {
1251	trace!(
1252	"using full DFA for forward reverse suffix search at {:?}",
1253	input.get_span()
1254	);
1255	e.try_search_half_fwd(&input)
1256	} else if let Some(e) = self.core.hybrid.get(&input) {
1257	trace!(
1258	"using lazy DFA for forward reverse suffix search at {:?}",
1259	input.get_span()
1260	);
1261	e.try_search_half_fwd(&mut cache.hybrid, &input)
1262	} else {
1263	unreachable!("ReverseSuffix always has a DFA")
1264	}
1265	}
1266
1267	#[cfg_attr(feature = "perf-inline", inline(always))]
1268	fn try_search_half_rev_limited(
1269	&self,
1270	cache: &mut Cache,
1271	input: &Input<'_>,
1272	min_start: usize,
1273	) -> Result<Option<HalfMatch>, RetryError> {
1274	if let Some(e) = self.core.dfa.get(&input) {
1275	trace!(
1276	"using full DFA for reverse suffix search at {:?}, \
1277	but will be stopped at {} to avoid quadratic behavior",
1278	input.get_span(),
1279	min_start,
1280	);
1281	e.try_search_half_rev_limited(&input, min_start)
1282	} else if let Some(e) = self.core.hybrid.get(&input) {
1283	trace!(
1284	"using lazy DFA for reverse suffix search at {:?}, \
1285	but will be stopped at {} to avoid quadratic behavior",
1286	input.get_span(),
1287	min_start,
1288	);
1289	e.try_search_half_rev_limited(&mut cache.hybrid, &input, min_start)
1290	} else {
1291	unreachable!("ReverseSuffix always has a DFA")
1292	}
1293	}
1294	}
1295
1296	impl Strategy for ReverseSuffix {
1297	#[cfg_attr(feature = "perf-inline", inline(always))]
1298	fn group_info(&self) -> &GroupInfo {
1299	self.core.group_info()
1300	}
1301
1302	#[cfg_attr(feature = "perf-inline", inline(always))]
1303	fn create_cache(&self) -> Cache {
1304	self.core.create_cache()
1305	}
1306
1307	#[cfg_attr(feature = "perf-inline", inline(always))]
1308	fn reset_cache(&self, cache: &mut Cache) {
1309	self.core.reset_cache(cache);
1310	}
1311
1312	fn is_accelerated(&self) -> bool {
1313	self.pre.is_fast()
1314	}
1315
1316	fn memory_usage(&self) -> usize {
1317	self.core.memory_usage() + self.pre.memory_usage()
1318	}
1319
1320	#[cfg_attr(feature = "perf-inline", inline(always))]
1321	fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
1322	if input.get_anchored().is_anchored() {
1323	return self.core.search(cache, input);
1324	}
1325	match self.try_search_half_start(cache, input) {
1326	Err(RetryError::Quadratic(_err)) => {
1327	trace!("reverse suffix optimization failed: {}", _err);
1328	self.core.search(cache, input)
1329	}
1330	Err(RetryError::Fail(_err)) => {
1331	trace!("reverse suffix reverse fast search failed: {}", _err);
1332	self.core.search_nofail(cache, input)
1333	}
1334	Ok(None) => None,
1335	Ok(Some(hm_start)) => {
1336	let fwdinput = input
1337	.clone()
1338	.anchored(Anchored::Pattern(hm_start.pattern()))
1339	.span(hm_start.offset()..input.end());
1340	match self.try_search_half_fwd(cache, &fwdinput) {
1341	Err(_err) => {
1342	trace!(
1343	"reverse suffix forward fast search failed: {}",
1344	_err
1345	);
1346	self.core.search_nofail(cache, input)
1347	}
1348	Ok(None) => {
1349	unreachable!(
1350	"suffix match plus reverse match implies \
1351	there must be a match",
1352	)
1353	}
1354	Ok(Some(hm_end)) => Some(Match::new(
1355	hm_start.pattern(),
1356	hm_start.offset()..hm_end.offset(),
1357	)),
1358	}
1359	}
1360	}
1361	}
1362
1363	#[cfg_attr(feature = "perf-inline", inline(always))]
1364	fn search_half(
1365	&self,
1366	cache: &mut Cache,
1367	input: &Input<'_>,
1368	) -> Option<HalfMatch> {
1369	if input.get_anchored().is_anchored() {
1370	return self.core.search_half(cache, input);
1371	}
1372	match self.try_search_half_start(cache, input) {
1373	Err(RetryError::Quadratic(_err)) => {
1374	trace!("reverse suffix half optimization failed: {}", _err);
1375	self.core.search_half(cache, input)
1376	}
1377	Err(RetryError::Fail(_err)) => {
1378	trace!(
1379	"reverse suffix reverse fast half search failed: {}",
1380	_err
1381	);
1382	self.core.search_half_nofail(cache, input)
1383	}
1384	Ok(None) => None,
1385	Ok(Some(hm_start)) => {
1386	// This is a bit subtle. It is tempting to just stop searching
1387	// at this point and return a half-match with an offset
1388	// corresponding to where the suffix was found. But the suffix
1389	// match does not necessarily correspond to the end of the
1390	// proper leftmost-first match. Consider /[a-z]+ing/ against
1391	// 'tingling'. The first suffix match is the first 'ing', and
1392	// the /[a-z]+/ matches the 't'. So if we stopped here, then
1393	// we'd report 'ting' as the match. But 'tingling' is the
1394	// correct match because of greediness.
1395	let fwdinput = input
1396	.clone()
1397	.anchored(Anchored::Pattern(hm_start.pattern()))
1398	.span(hm_start.offset()..input.end());
1399	match self.try_search_half_fwd(cache, &fwdinput) {
1400	Err(_err) => {
1401	trace!(
1402	"reverse suffix forward fast search failed: {}",
1403	_err
1404	);
1405	self.core.search_half_nofail(cache, input)
1406	}
1407	Ok(None) => {
1408	unreachable!(
1409	"suffix match plus reverse match implies \
1410	there must be a match",
1411	)
1412	}
1413	Ok(Some(hm_end)) => Some(hm_end),
1414	}
1415	}
1416	}
1417	}
1418
1419	#[cfg_attr(feature = "perf-inline", inline(always))]
1420	fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
1421	if input.get_anchored().is_anchored() {
1422	return self.core.is_match(cache, input);
1423	}
1424	match self.try_search_half_start(cache, input) {
1425	Err(RetryError::Quadratic(_err)) => {
1426	trace!("reverse suffix half optimization failed: {}", _err);
1427	self.core.is_match_nofail(cache, input)
1428	}
1429	Err(RetryError::Fail(_err)) => {
1430	trace!(
1431	"reverse suffix reverse fast half search failed: {}",
1432	_err
1433	);
1434	self.core.is_match_nofail(cache, input)
1435	}
1436	Ok(None) => `false`,
1437	Ok(Some(_)) => `true`,
1438	}
1439	}
1440
1441	#[cfg_attr(feature = "perf-inline", inline(always))]
1442	fn search_slots(
1443	&self,
1444	cache: &mut Cache,
1445	input: &Input<'_>,
1446	slots: &mut [Option<NonMaxUsize>],
1447	) -> Option<PatternID> {
1448	if input.get_anchored().is_anchored() {
1449	return self.core.search_slots(cache, input, slots);
1450	}
1451	if !self.core.is_capture_search_needed(slots.len()) {
1452	trace!("asked for slots unnecessarily, trying fast path");
1453	let m = self.search(cache, input)?;
1454	copy_match_to_slots(m, slots);
1455	return Some(m.pattern());
1456	}
1457	let hm_start = match self.try_search_half_start(cache, input) {
1458	Err(RetryError::Quadratic(_err)) => {
1459	trace!(
1460	"reverse suffix captures optimization failed: {}",
1461	_err
1462	);
1463	return self.core.search_slots(cache, input, slots);
1464	}
1465	Err(RetryError::Fail(_err)) => {
1466	trace!(
1467	"reverse suffix reverse fast captures search failed: {}",
1468	_err
1469	);
1470	return self.core.search_slots_nofail(cache, input, slots);
1471	}
1472	Ok(None) => return None,
1473	Ok(Some(hm_start)) => hm_start,
1474	};
1475	trace!(
1476	"match found at {}..{} in capture search, \
1477	using another engine to find captures",
1478	hm_start.offset(),
1479	input.end(),
1480	);
1481	let start = hm_start.offset();
1482	let input = input
1483	.clone()
1484	.span(start..input.end())
1485	.anchored(Anchored::Pattern(hm_start.pattern()));
1486	self.core.search_slots_nofail(cache, &input, slots)
1487	}
1488
1489	#[cfg_attr(feature = "perf-inline", inline(always))]
1490	fn which_overlapping_matches(
1491	&self,
1492	cache: &mut Cache,
1493	input: &Input<'_>,
1494	patset: &mut PatternSet,
1495	) {
1496	self.core.which_overlapping_matches(cache, input, patset)
1497	}
1498	}
1499
1500	#[derive(Debug)]
1501	struct ReverseInner {
1502	core: Core,
1503	preinner: Prefilter,
1504	nfarev: NFA,
1505	hybrid: wrappers::ReverseHybrid,
1506	dfa: wrappers::ReverseDFA,
1507	}
1508
1509	impl ReverseInner {
1510	fn new(core: Core, hirs: &[&Hir]) -> Result<ReverseInner, Core> {
1511	if !core.info.config().get_auto_prefilter() {
1512	debug!(
1513	"skipping reverse inner optimization because \
1514	automatic prefilters are disabled"
1515	);
1516	return Err(core);
1517	}
1518	// Currently we hard-code the assumption of leftmost-first match
1519	// semantics. This isn't a huge deal because 'all' semantics tend to
1520	// only be used for forward overlapping searches with multiple regexes,
1521	// and this optimization only supports a single pattern at the moment.
1522	if core.info.config().get_match_kind() != MatchKind::LeftmostFirst {
1523	debug!(
1524	"skipping reverse inner optimization because \
1525	match kind is {:?} but this only supports leftmost-first",
1526	core.info.config().get_match_kind(),
1527	);
1528	return Err(core);
1529	}
1530	// It's likely that a reverse inner scan has too much overhead for it
1531	// to be worth it when the regex is anchored at the start. It is
1532	// possible for it to be quite a bit faster if the initial literal
1533	// scan fails to detect a match, in which case, we can say "no match"
1534	// very quickly. But this could be undesirable, e.g., scanning too far
1535	// or when the literal scan matches. If it matches, then confirming the
1536	// match requires a reverse scan followed by a forward scan to confirm
1537	// or reject, which is a fair bit of work.
1538	//
1539	// Note that the caller can still request an anchored search even
1540	// when the regex isn't anchored. We detect that case in the search
1541	// routines below and just fallback to the core engine. Currently this
1542	// optimization assumes all searches are unanchored, so if we do want
1543	// to enable this optimization for anchored searches, it will need a
1544	// little work to support it.
1545	if core.info.is_always_anchored_start() {
1546	debug!(
1547	"skipping reverse inner optimization because \
1548	the regex is always anchored at the start",
1549	);
1550	return Err(core);
1551	}
1552	// Only DFAs can do reverse searches (currently), so we need one of
1553	// them in order to do this optimization. It's possible (although
1554	// pretty unlikely) that we have neither and need to give up.
1555	if !core.hybrid.is_some() && !core.dfa.is_some() {
1556	debug!(
1557	"skipping reverse inner optimization because \
1558	we don't have a lazy DFA or a full DFA"
1559	);
1560	return Err(core);
1561	}
1562	if core.pre.as_ref().map_or(`false`, \|p\| p.is_fast()) {
1563	debug!(
1564	"skipping reverse inner optimization because \
1565	we already have a prefilter that we think is fast"
1566	);
1567	return Err(core);
1568	} else if core.pre.is_some() {
1569	debug!(
1570	"core engine has a prefix prefilter, but it is \
1571	probably not fast, so continuing with attempt to \
1572	use reverse inner prefilter"
1573	);
1574	}
1575	let (concat_prefix, preinner) = match reverse_inner::extract(hirs) {
1576	Some(x) => x,
1577	// N.B. the 'extract' function emits debug messages explaining
1578	// why we bailed out here.
1579	None => return Err(core),
1580	};
1581	debug!("building reverse NFA for prefix before inner literal");
1582	let mut lookm = LookMatcher::new();
1583	lookm.set_line_terminator(core.info.config().get_line_terminator());
1584	let thompson_config = thompson::Config::new()
1585	.reverse(`true`)
1586	.utf8(core.info.config().get_utf8_empty())
1587	.nfa_size_limit(core.info.config().get_nfa_size_limit())
1588	.shrink(`false`)
1589	.which_captures(WhichCaptures::None)
1590	.look_matcher(lookm);
1591	let result = thompson::Compiler::new()
1592	.configure(thompson_config)
1593	.build_from_hir(&concat_prefix);
1594	let nfarev = match result {
1595	Ok(nfarev) => nfarev,
1596	Err(_err) => {
1597	debug!(
1598	"skipping reverse inner optimization because the \
1599	reverse NFA failed to build: {}",
1600	_err,
1601	);
1602	return Err(core);
1603	}
1604	};
1605	debug!("building reverse DFA for prefix before inner literal");
1606	let dfa = if !core.info.config().get_dfa() {
1607	wrappers::ReverseDFA::none()
1608	} else {
1609	wrappers::ReverseDFA::new(&core.info, &nfarev)
1610	};
1611	let hybrid = if !core.info.config().get_hybrid() {
1612	wrappers::ReverseHybrid::none()
1613	} else if dfa.is_some() {
1614	debug!(
1615	"skipping lazy DFA for reverse inner optimization \
1616	because we have a full DFA"
1617	);
1618	wrappers::ReverseHybrid::none()
1619	} else {
1620	wrappers::ReverseHybrid::new(&core.info, &nfarev)
1621	};
1622	Ok(ReverseInner { core, preinner, nfarev, hybrid, dfa })
1623	}
1624
1625	#[cfg_attr(feature = "perf-inline", inline(always))]
1626	fn try_search_full(
1627	&self,
1628	cache: &mut Cache,
1629	input: &Input<'_>,
1630	) -> Result<Option<Match>, RetryError> {
1631	let mut span = input.get_span();
1632	let mut min_match_start = `0`;
1633	let mut min_pre_start = `0`;
1634	loop {
1635	let litmatch = match self.preinner.find(input.haystack(), span) {
1636	None => return Ok(None),
1637	Some(span) => span,
1638	};
1639	if litmatch.start < min_pre_start {
1640	trace!(
1641	"found inner prefilter match at {:?}, which starts \
1642	before the end of the last forward scan at {}, \
1643	quitting to avoid quadratic behavior",
1644	litmatch,
1645	min_pre_start,
1646	);
1647	return Err(RetryError::Quadratic(RetryQuadraticError::new()));
1648	}
1649	trace!("reverse inner scan found inner match at {:?}", litmatch);
1650	let revinput = input
1651	.clone()
1652	.anchored(Anchored::Yes)
1653	.span(input.start()..litmatch.start);
1654	// Note that in addition to the literal search above scanning past
1655	// our minimum start point, this routine can also return an error
1656	// as a result of detecting possible quadratic behavior if the
1657	// reverse scan goes past the minimum start point. That is, the
1658	// literal search might not, but the reverse regex search for the
1659	// prefix might!
1660	match self.try_search_half_rev_limited(
1661	cache,
1662	&revinput,
1663	min_match_start,
1664	)? {
1665	None => {
1666	if span.start >= span.end {
1667	break;
1668	}
1669	span.start = litmatch.start.checked_add(`1`).unwrap();
1670	}
1671	Some(hm_start) => {
1672	let fwdinput = input
1673	.clone()
1674	.anchored(Anchored::Pattern(hm_start.pattern()))
1675	.span(hm_start.offset()..input.end());
1676	match self.try_search_half_fwd_stopat(cache, &fwdinput)? {
1677	Err(stopat) => {
1678	min_pre_start = stopat;
1679	span.start =
1680	litmatch.start.checked_add(`1`).unwrap();
1681	}
1682	Ok(hm_end) => {
1683	return Ok(Some(Match::new(
1684	hm_start.pattern(),
1685	hm_start.offset()..hm_end.offset(),
1686	)))
1687	}
1688	}
1689	}
1690	}
1691	min_match_start = litmatch.end;
1692	}
1693	Ok(None)
1694	}
1695
1696	#[cfg_attr(feature = "perf-inline", inline(always))]
1697	fn try_search_half_fwd_stopat(
1698	&self,
1699	cache: &mut Cache,
1700	input: &Input<'_>,
1701	) -> Result<Result<HalfMatch, usize>, RetryFailError> {
1702	if let Some(e) = self.core.dfa.get(&input) {
1703	trace!(
1704	"using full DFA for forward reverse inner search at {:?}",
1705	input.get_span()
1706	);
1707	e.try_search_half_fwd_stopat(&input)
1708	} else if let Some(e) = self.core.hybrid.get(&input) {
1709	trace!(
1710	"using lazy DFA for forward reverse inner search at {:?}",
1711	input.get_span()
1712	);
1713	e.try_search_half_fwd_stopat(&mut cache.hybrid, &input)
1714	} else {
1715	unreachable!("ReverseInner always has a DFA")
1716	}
1717	}
1718
1719	#[cfg_attr(feature = "perf-inline", inline(always))]
1720	fn try_search_half_rev_limited(
1721	&self,
1722	cache: &mut Cache,
1723	input: &Input<'_>,
1724	min_start: usize,
1725	) -> Result<Option<HalfMatch>, RetryError> {
1726	if let Some(e) = self.dfa.get(&input) {
1727	trace!(
1728	"using full DFA for reverse inner search at {:?}, \
1729	but will be stopped at {} to avoid quadratic behavior",
1730	input.get_span(),
1731	min_start,
1732	);
1733	e.try_search_half_rev_limited(&input, min_start)
1734	} else if let Some(e) = self.hybrid.get(&input) {
1735	trace!(
1736	"using lazy DFA for reverse inner search at {:?}, \
1737	but will be stopped at {} to avoid quadratic behavior",
1738	input.get_span(),
1739	min_start,
1740	);
1741	e.try_search_half_rev_limited(
1742	&mut cache.revhybrid,
1743	&input,
1744	min_start,
1745	)
1746	} else {
1747	unreachable!("ReverseInner always has a DFA")
1748	}
1749	}
1750	}
1751
1752	impl Strategy for ReverseInner {
1753	#[cfg_attr(feature = "perf-inline", inline(always))]
1754	fn group_info(&self) -> &GroupInfo {
1755	self.core.group_info()
1756	}
1757
1758	#[cfg_attr(feature = "perf-inline", inline(always))]
1759	fn create_cache(&self) -> Cache {
1760	let mut cache = self.core.create_cache();
1761	cache.revhybrid = self.hybrid.create_cache();
1762	cache
1763	}
1764
1765	#[cfg_attr(feature = "perf-inline", inline(always))]
1766	fn reset_cache(&self, cache: &mut Cache) {
1767	self.core.reset_cache(cache);
1768	cache.revhybrid.reset(&self.hybrid);
1769	}
1770
1771	fn is_accelerated(&self) -> bool {
1772	self.preinner.is_fast()
1773	}
1774
1775	fn memory_usage(&self) -> usize {
1776	self.core.memory_usage()
1777	+ self.preinner.memory_usage()
1778	+ self.nfarev.memory_usage()
1779	+ self.dfa.memory_usage()
1780	}
1781
1782	#[cfg_attr(feature = "perf-inline", inline(always))]
1783	fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
1784	if input.get_anchored().is_anchored() {
1785	return self.core.search(cache, input);
1786	}
1787	match self.try_search_full(cache, input) {
1788	Err(RetryError::Quadratic(_err)) => {
1789	trace!("reverse inner optimization failed: {}", _err);
1790	self.core.search(cache, input)
1791	}
1792	Err(RetryError::Fail(_err)) => {
1793	trace!("reverse inner fast search failed: {}", _err);
1794	self.core.search_nofail(cache, input)
1795	}
1796	Ok(matornot) => matornot,
1797	}
1798	}
1799
1800	#[cfg_attr(feature = "perf-inline", inline(always))]
1801	fn search_half(
1802	&self,
1803	cache: &mut Cache,
1804	input: &Input<'_>,
1805	) -> Option<HalfMatch> {
1806	if input.get_anchored().is_anchored() {
1807	return self.core.search_half(cache, input);
1808	}
1809	match self.try_search_full(cache, input) {
1810	Err(RetryError::Quadratic(_err)) => {
1811	trace!("reverse inner half optimization failed: {}", _err);
1812	self.core.search_half(cache, input)
1813	}
1814	Err(RetryError::Fail(_err)) => {
1815	trace!("reverse inner fast half search failed: {}", _err);
1816	self.core.search_half_nofail(cache, input)
1817	}
1818	Ok(None) => None,
1819	Ok(Some(m)) => Some(HalfMatch::new(m.pattern(), m.end())),
1820	}
1821	}
1822
1823	#[cfg_attr(feature = "perf-inline", inline(always))]
1824	fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
1825	if input.get_anchored().is_anchored() {
1826	return self.core.is_match(cache, input);
1827	}
1828	match self.try_search_full(cache, input) {
1829	Err(RetryError::Quadratic(_err)) => {
1830	trace!("reverse inner half optimization failed: {}", _err);
1831	self.core.is_match_nofail(cache, input)
1832	}
1833	Err(RetryError::Fail(_err)) => {
1834	trace!("reverse inner fast half search failed: {}", _err);
1835	self.core.is_match_nofail(cache, input)
1836	}
1837	Ok(None) => `false`,
1838	Ok(Some(_)) => `true`,
1839	}
1840	}
1841
1842	#[cfg_attr(feature = "perf-inline", inline(always))]
1843	fn search_slots(
1844	&self,
1845	cache: &mut Cache,
1846	input: &Input<'_>,
1847	slots: &mut [Option<NonMaxUsize>],
1848	) -> Option<PatternID> {
1849	if input.get_anchored().is_anchored() {
1850	return self.core.search_slots(cache, input, slots);
1851	}
1852	if !self.core.is_capture_search_needed(slots.len()) {
1853	trace!("asked for slots unnecessarily, trying fast path");
1854	let m = self.search(cache, input)?;
1855	copy_match_to_slots(m, slots);
1856	return Some(m.pattern());
1857	}
1858	let m = match self.try_search_full(cache, input) {
1859	Err(RetryError::Quadratic(_err)) => {
1860	trace!("reverse inner captures optimization failed: {}", _err);
1861	return self.core.search_slots(cache, input, slots);
1862	}
1863	Err(RetryError::Fail(_err)) => {
1864	trace!("reverse inner fast captures search failed: {}", _err);
1865	return self.core.search_slots_nofail(cache, input, slots);
1866	}
1867	Ok(None) => return None,
1868	Ok(Some(m)) => m,
1869	};
1870	trace!(
1871	"match found at {}..{} in capture search, \
1872	using another engine to find captures",
1873	m.start(),
1874	m.end(),
1875	);
1876	let input = input
1877	.clone()
1878	.span(m.start()..m.end())
1879	.anchored(Anchored::Pattern(m.pattern()));
1880	self.core.search_slots_nofail(cache, &input, slots)
1881	}
1882
1883	#[cfg_attr(feature = "perf-inline", inline(always))]
1884	fn which_overlapping_matches(
1885	&self,
1886	cache: &mut Cache,
1887	input: &Input<'_>,
1888	patset: &mut PatternSet,
1889	) {
1890	self.core.which_overlapping_matches(cache, input, patset)
1891	}
1892	}
1893
1894	/// Copies the offsets in the given match to the corresponding positions in
1895	/// `slots`.
1896	///
1897	/// In effect, this sets the slots corresponding to the implicit group for the
1898	/// pattern in the given match. If the indices for the corresponding slots do
1899	/// not exist, then no slots are set.
1900	///
1901	/// This is useful when the caller provides slots (or captures), but you use a
1902	/// regex engine that doesn't operate on slots (like a lazy DFA). This function
1903	/// lets you map the match you get back to the slots provided by the caller.
1904	#[cfg_attr(feature = "perf-inline", inline(always))]
1905	fn copy_match_to_slots(m: Match, slots: &mut [Option<NonMaxUsize>]) {
1906	let slot_start = m.pattern().as_usize() * `2`;
1907	let slot_end = slot_start + `1`;
1908	if let Some(slot) = slots.get_mut(slot_start) {
1909	*slot = NonMaxUsize::new(m.start());
1910	}
1911	if let Some(slot) = slots.get_mut(slot_end) {
1912	*slot = NonMaxUsize::new(m.end());
1913	}
1914	}
1915