1/*!
2This module contains a boat load of wrappers around each of our internal regex
3engines. They encapsulate a few things:
4
51. The wrappers manage the conditional existence of the regex engine. Namely,
6the PikeVM is the only required regex engine. The rest are optional. These
7wrappers present a uniform API regardless of which engines are available. And
8availability might be determined by compile time features or by dynamic
9configuration via `meta::Config`. Encapsulating the conditional compilation
10features is in particular a huge simplification for the higher level code that
11composes these engines.
122. The wrappers manage construction of each engine, including skipping it if
13the engine is unavailable or configured to not be used.
143. The wrappers manage whether an engine *can* be used for a particular
15search configuration. For example, `BoundedBacktracker::get` only returns a
16backtracking engine when the haystack is bigger than the maximum supported
17length. The wrappers also sometimes take a position on when an engine *ought*
18to be used, but only in cases where the logic is extremely local to the engine
19itself. Otherwise, things like "choose between the backtracker and the one-pass
20DFA" are managed by the higher level meta strategy code.
21
22There are also corresponding wrappers for the various `Cache` types for each
23regex engine that needs them. If an engine is unavailable or not used, then a
24cache for it will *not* actually be allocated.
25*/
26
27use alloc::vec::Vec;
28
29use crate::{
30 meta::{
31 error::{BuildError, RetryError, RetryFailError},
32 regex::RegexInfo,
33 },
34 nfa::thompson::{pikevm, NFA},
35 util::{prefilter::Prefilter, primitives::NonMaxUsize},
36 HalfMatch, Input, Match, MatchKind, PatternID, PatternSet,
37};
38
39#[cfg(feature = "dfa-build")]
40use crate::dfa;
41#[cfg(feature = "dfa-onepass")]
42use crate::dfa::onepass;
43#[cfg(feature = "hybrid")]
44use crate::hybrid;
45#[cfg(feature = "nfa-backtrack")]
46use crate::nfa::thompson::backtrack;
47
48#[derive(Debug)]
49pub(crate) struct PikeVM(PikeVMEngine);
50
51impl PikeVM {
52 pub(crate) fn new(
53 info: &RegexInfo,
54 pre: Option<Prefilter>,
55 nfa: &NFA,
56 ) -> Result<PikeVM, BuildError> {
57 PikeVMEngine::new(info, pre, nfa).map(PikeVM)
58 }
59
60 pub(crate) fn create_cache(&self) -> PikeVMCache {
61 PikeVMCache::new(self)
62 }
63
64 #[cfg_attr(feature = "perf-inline", inline(always))]
65 pub(crate) fn get(&self) -> &PikeVMEngine {
66 &self.0
67 }
68}
69
70#[derive(Debug)]
71pub(crate) struct PikeVMEngine(pikevm::PikeVM);
72
73impl PikeVMEngine {
74 pub(crate) fn new(
75 info: &RegexInfo,
76 pre: Option<Prefilter>,
77 nfa: &NFA,
78 ) -> Result<PikeVMEngine, BuildError> {
79 let pikevm_config = pikevm::Config::new()
80 .match_kind(info.config().get_match_kind())
81 .prefilter(pre);
82 let engine = pikevm::Builder::new()
83 .configure(pikevm_config)
84 .build_from_nfa(nfa.clone())
85 .map_err(BuildError::nfa)?;
86 debug!("PikeVM built");
87 Ok(PikeVMEngine(engine))
88 }
89
90 #[cfg_attr(feature = "perf-inline", inline(always))]
91 pub(crate) fn is_match(
92 &self,
93 cache: &mut PikeVMCache,
94 input: &Input<'_>,
95 ) -> bool {
96 self.0.is_match(cache.0.as_mut().unwrap(), input.clone())
97 }
98
99 #[cfg_attr(feature = "perf-inline", inline(always))]
100 pub(crate) fn search_slots(
101 &self,
102 cache: &mut PikeVMCache,
103 input: &Input<'_>,
104 slots: &mut [Option<NonMaxUsize>],
105 ) -> Option<PatternID> {
106 self.0.search_slots(cache.0.as_mut().unwrap(), input, slots)
107 }
108
109 #[cfg_attr(feature = "perf-inline", inline(always))]
110 pub(crate) fn which_overlapping_matches(
111 &self,
112 cache: &mut PikeVMCache,
113 input: &Input<'_>,
114 patset: &mut PatternSet,
115 ) {
116 self.0.which_overlapping_matches(
117 cache.0.as_mut().unwrap(),
118 input,
119 patset,
120 )
121 }
122}
123
124#[derive(Clone, Debug)]
125pub(crate) struct PikeVMCache(Option<pikevm::Cache>);
126
127impl PikeVMCache {
128 pub(crate) fn none() -> PikeVMCache {
129 PikeVMCache(None)
130 }
131
132 pub(crate) fn new(builder: &PikeVM) -> PikeVMCache {
133 PikeVMCache(Some(builder.get().0.create_cache()))
134 }
135
136 pub(crate) fn reset(&mut self, builder: &PikeVM) {
137 self.0.as_mut().unwrap().reset(&builder.get().0);
138 }
139
140 pub(crate) fn memory_usage(&self) -> usize {
141 self.0.as_ref().map_or(0, |c| c.memory_usage())
142 }
143}
144
145#[derive(Debug)]
146pub(crate) struct BoundedBacktracker(Option<BoundedBacktrackerEngine>);
147
148impl BoundedBacktracker {
149 pub(crate) fn new(
150 info: &RegexInfo,
151 pre: Option<Prefilter>,
152 nfa: &NFA,
153 ) -> Result<BoundedBacktracker, BuildError> {
154 BoundedBacktrackerEngine::new(info, pre, nfa).map(BoundedBacktracker)
155 }
156
157 pub(crate) fn create_cache(&self) -> BoundedBacktrackerCache {
158 BoundedBacktrackerCache::new(self)
159 }
160
161 #[cfg_attr(feature = "perf-inline", inline(always))]
162 pub(crate) fn get(
163 &self,
164 input: &Input<'_>,
165 ) -> Option<&BoundedBacktrackerEngine> {
166 let engine = self.0.as_ref()?;
167 // It is difficult to make the backtracker give up early if it is
168 // guaranteed to eventually wind up in a match state. This is because
169 // of the greedy nature of a backtracker: it just blindly mushes
170 // forward. Every other regex engine is able to give up more quickly,
171 // so even if the backtracker might be able to zip through faster than
172 // (say) the PikeVM, we prefer the theoretical benefit that some other
173 // engine might be able to scan much less of the haystack than the
174 // backtracker.
175 //
176 // Now, if the haystack is really short already, then we allow the
177 // backtracker to run. (This hasn't been litigated quantitatively with
178 // benchmarks. Just a hunch.)
179 if input.get_earliest() && input.haystack().len() > 128 {
180 return None;
181 }
182 // If the backtracker is just going to return an error because the
183 // haystack is too long, then obviously do not use it.
184 if input.get_span().len() > engine.max_haystack_len() {
185 return None;
186 }
187 Some(engine)
188 }
189}
190
191#[derive(Debug)]
192pub(crate) struct BoundedBacktrackerEngine(
193 #[cfg(feature = "nfa-backtrack")] backtrack::BoundedBacktracker,
194 #[cfg(not(feature = "nfa-backtrack"))] (),
195);
196
197impl BoundedBacktrackerEngine {
198 pub(crate) fn new(
199 info: &RegexInfo,
200 pre: Option<Prefilter>,
201 nfa: &NFA,
202 ) -> Result<Option<BoundedBacktrackerEngine>, BuildError> {
203 #[cfg(feature = "nfa-backtrack")]
204 {
205 if !info.config().get_backtrack()
206 || info.config().get_match_kind() != MatchKind::LeftmostFirst
207 {
208 return Ok(None);
209 }
210 let backtrack_config = backtrack::Config::new().prefilter(pre);
211 let engine = backtrack::Builder::new()
212 .configure(backtrack_config)
213 .build_from_nfa(nfa.clone())
214 .map_err(BuildError::nfa)?;
215 debug!(
216 "BoundedBacktracker built (max haystack length: {:?})",
217 engine.max_haystack_len()
218 );
219 Ok(Some(BoundedBacktrackerEngine(engine)))
220 }
221 #[cfg(not(feature = "nfa-backtrack"))]
222 {
223 Ok(None)
224 }
225 }
226
227 #[cfg_attr(feature = "perf-inline", inline(always))]
228 pub(crate) fn is_match(
229 &self,
230 cache: &mut BoundedBacktrackerCache,
231 input: &Input<'_>,
232 ) -> bool {
233 #[cfg(feature = "nfa-backtrack")]
234 {
235 // OK because we only permit access to this engine when we know
236 // the haystack is short enough for the backtracker to run without
237 // reporting an error.
238 self.0
239 .try_is_match(cache.0.as_mut().unwrap(), input.clone())
240 .unwrap()
241 }
242 #[cfg(not(feature = "nfa-backtrack"))]
243 {
244 // Impossible to reach because this engine is never constructed
245 // if the requisite features aren't enabled.
246 unreachable!()
247 }
248 }
249
250 #[cfg_attr(feature = "perf-inline", inline(always))]
251 pub(crate) fn search_slots(
252 &self,
253 cache: &mut BoundedBacktrackerCache,
254 input: &Input<'_>,
255 slots: &mut [Option<NonMaxUsize>],
256 ) -> Option<PatternID> {
257 #[cfg(feature = "nfa-backtrack")]
258 {
259 // OK because we only permit access to this engine when we know
260 // the haystack is short enough for the backtracker to run without
261 // reporting an error.
262 self.0
263 .try_search_slots(cache.0.as_mut().unwrap(), input, slots)
264 .unwrap()
265 }
266 #[cfg(not(feature = "nfa-backtrack"))]
267 {
268 // Impossible to reach because this engine is never constructed
269 // if the requisite features aren't enabled.
270 unreachable!()
271 }
272 }
273
274 #[cfg_attr(feature = "perf-inline", inline(always))]
275 fn max_haystack_len(&self) -> usize {
276 #[cfg(feature = "nfa-backtrack")]
277 {
278 self.0.max_haystack_len()
279 }
280 #[cfg(not(feature = "nfa-backtrack"))]
281 {
282 // Impossible to reach because this engine is never constructed
283 // if the requisite features aren't enabled.
284 unreachable!()
285 }
286 }
287}
288
289#[derive(Clone, Debug)]
290pub(crate) struct BoundedBacktrackerCache(
291 #[cfg(feature = "nfa-backtrack")] Option<backtrack::Cache>,
292 #[cfg(not(feature = "nfa-backtrack"))] (),
293);
294
295impl BoundedBacktrackerCache {
296 pub(crate) fn none() -> BoundedBacktrackerCache {
297 #[cfg(feature = "nfa-backtrack")]
298 {
299 BoundedBacktrackerCache(None)
300 }
301 #[cfg(not(feature = "nfa-backtrack"))]
302 {
303 BoundedBacktrackerCache(())
304 }
305 }
306
307 pub(crate) fn new(
308 builder: &BoundedBacktracker,
309 ) -> BoundedBacktrackerCache {
310 #[cfg(feature = "nfa-backtrack")]
311 {
312 BoundedBacktrackerCache(
313 builder.0.as_ref().map(|e| e.0.create_cache()),
314 )
315 }
316 #[cfg(not(feature = "nfa-backtrack"))]
317 {
318 BoundedBacktrackerCache(())
319 }
320 }
321
322 pub(crate) fn reset(&mut self, builder: &BoundedBacktracker) {
323 #[cfg(feature = "nfa-backtrack")]
324 if let Some(ref e) = builder.0 {
325 self.0.as_mut().unwrap().reset(&e.0);
326 }
327 }
328
329 pub(crate) fn memory_usage(&self) -> usize {
330 #[cfg(feature = "nfa-backtrack")]
331 {
332 self.0.as_ref().map_or(0, |c| c.memory_usage())
333 }
334 #[cfg(not(feature = "nfa-backtrack"))]
335 {
336 0
337 }
338 }
339}
340
341#[derive(Debug)]
342pub(crate) struct OnePass(Option<OnePassEngine>);
343
344impl OnePass {
345 pub(crate) fn new(info: &RegexInfo, nfa: &NFA) -> OnePass {
346 OnePass(OnePassEngine::new(info, nfa))
347 }
348
349 pub(crate) fn create_cache(&self) -> OnePassCache {
350 OnePassCache::new(self)
351 }
352
353 #[cfg_attr(feature = "perf-inline", inline(always))]
354 pub(crate) fn get(&self, input: &Input<'_>) -> Option<&OnePassEngine> {
355 let engine = self.0.as_ref()?;
356 if !input.get_anchored().is_anchored()
357 && !engine.get_nfa().is_always_start_anchored()
358 {
359 return None;
360 }
361 Some(engine)
362 }
363
364 pub(crate) fn memory_usage(&self) -> usize {
365 self.0.as_ref().map_or(0, |e| e.memory_usage())
366 }
367}
368
369#[derive(Debug)]
370pub(crate) struct OnePassEngine(
371 #[cfg(feature = "dfa-onepass")] onepass::DFA,
372 #[cfg(not(feature = "dfa-onepass"))] (),
373);
374
375impl OnePassEngine {
376 pub(crate) fn new(info: &RegexInfo, nfa: &NFA) -> Option<OnePassEngine> {
377 #[cfg(feature = "dfa-onepass")]
378 {
379 if !info.config().get_onepass() {
380 return None;
381 }
382 // In order to even attempt building a one-pass DFA, we require
383 // that we either have at least one explicit capturing group or
384 // there's a Unicode word boundary somewhere. If we don't have
385 // either of these things, then the lazy DFA will almost certainly
386 // be useable and be much faster. The only case where it might
387 // not is if the lazy DFA isn't utilizing its cache effectively,
388 // but in those cases, the underlying regex is almost certainly
389 // not one-pass or is too big to fit within the current one-pass
390 // implementation limits.
391 if info.props_union().explicit_captures_len() == 0
392 && !info.props_union().look_set().contains_word_unicode()
393 {
394 debug!("not building OnePass because it isn't worth it");
395 return None;
396 }
397 let onepass_config = onepass::Config::new()
398 .match_kind(info.config().get_match_kind())
399 // Like for the lazy DFA, we unconditionally enable this
400 // because it doesn't cost much and makes the API more
401 // flexible.
402 .starts_for_each_pattern(true)
403 .byte_classes(info.config().get_byte_classes())
404 .size_limit(info.config().get_onepass_size_limit());
405 let result = onepass::Builder::new()
406 .configure(onepass_config)
407 .build_from_nfa(nfa.clone());
408 let engine = match result {
409 Ok(engine) => engine,
410 Err(_err) => {
411 debug!("OnePass failed to build: {}", _err);
412 return None;
413 }
414 };
415 debug!("OnePass built, {} bytes", engine.memory_usage());
416 Some(OnePassEngine(engine))
417 }
418 #[cfg(not(feature = "dfa-onepass"))]
419 {
420 None
421 }
422 }
423
424 #[cfg_attr(feature = "perf-inline", inline(always))]
425 pub(crate) fn search_slots(
426 &self,
427 cache: &mut OnePassCache,
428 input: &Input<'_>,
429 slots: &mut [Option<NonMaxUsize>],
430 ) -> Option<PatternID> {
431 #[cfg(feature = "dfa-onepass")]
432 {
433 // OK because we only permit getting a OnePassEngine when we know
434 // the search is anchored and thus an error cannot occur.
435 self.0
436 .try_search_slots(cache.0.as_mut().unwrap(), input, slots)
437 .unwrap()
438 }
439 #[cfg(not(feature = "dfa-onepass"))]
440 {
441 // Impossible to reach because this engine is never constructed
442 // if the requisite features aren't enabled.
443 unreachable!()
444 }
445 }
446
447 pub(crate) fn memory_usage(&self) -> usize {
448 #[cfg(feature = "dfa-onepass")]
449 {
450 self.0.memory_usage()
451 }
452 #[cfg(not(feature = "dfa-onepass"))]
453 {
454 // Impossible to reach because this engine is never constructed
455 // if the requisite features aren't enabled.
456 unreachable!()
457 }
458 }
459
460 #[cfg_attr(feature = "perf-inline", inline(always))]
461 fn get_nfa(&self) -> &NFA {
462 #[cfg(feature = "dfa-onepass")]
463 {
464 self.0.get_nfa()
465 }
466 #[cfg(not(feature = "dfa-onepass"))]
467 {
468 // Impossible to reach because this engine is never constructed
469 // if the requisite features aren't enabled.
470 unreachable!()
471 }
472 }
473}
474
475#[derive(Clone, Debug)]
476pub(crate) struct OnePassCache(
477 #[cfg(feature = "dfa-onepass")] Option<onepass::Cache>,
478 #[cfg(not(feature = "dfa-onepass"))] (),
479);
480
481impl OnePassCache {
482 pub(crate) fn none() -> OnePassCache {
483 #[cfg(feature = "dfa-onepass")]
484 {
485 OnePassCache(None)
486 }
487 #[cfg(not(feature = "dfa-onepass"))]
488 {
489 OnePassCache(())
490 }
491 }
492
493 pub(crate) fn new(builder: &OnePass) -> OnePassCache {
494 #[cfg(feature = "dfa-onepass")]
495 {
496 OnePassCache(builder.0.as_ref().map(|e| e.0.create_cache()))
497 }
498 #[cfg(not(feature = "dfa-onepass"))]
499 {
500 OnePassCache(())
501 }
502 }
503
504 pub(crate) fn reset(&mut self, builder: &OnePass) {
505 #[cfg(feature = "dfa-onepass")]
506 if let Some(ref e) = builder.0 {
507 self.0.as_mut().unwrap().reset(&e.0);
508 }
509 }
510
511 pub(crate) fn memory_usage(&self) -> usize {
512 #[cfg(feature = "dfa-onepass")]
513 {
514 self.0.as_ref().map_or(0, |c| c.memory_usage())
515 }
516 #[cfg(not(feature = "dfa-onepass"))]
517 {
518 0
519 }
520 }
521}
522
523#[derive(Debug)]
524pub(crate) struct Hybrid(Option<HybridEngine>);
525
526impl Hybrid {
527 pub(crate) fn none() -> Hybrid {
528 Hybrid(None)
529 }
530
531 pub(crate) fn new(
532 info: &RegexInfo,
533 pre: Option<Prefilter>,
534 nfa: &NFA,
535 nfarev: &NFA,
536 ) -> Hybrid {
537 Hybrid(HybridEngine::new(info, pre, nfa, nfarev))
538 }
539
540 pub(crate) fn create_cache(&self) -> HybridCache {
541 HybridCache::new(self)
542 }
543
544 #[cfg_attr(feature = "perf-inline", inline(always))]
545 pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&HybridEngine> {
546 let engine = self.0.as_ref()?;
547 Some(engine)
548 }
549
550 pub(crate) fn is_some(&self) -> bool {
551 self.0.is_some()
552 }
553}
554
555#[derive(Debug)]
556pub(crate) struct HybridEngine(
557 #[cfg(feature = "hybrid")] hybrid::regex::Regex,
558 #[cfg(not(feature = "hybrid"))] (),
559);
560
561impl HybridEngine {
562 pub(crate) fn new(
563 info: &RegexInfo,
564 pre: Option<Prefilter>,
565 nfa: &NFA,
566 nfarev: &NFA,
567 ) -> Option<HybridEngine> {
568 #[cfg(feature = "hybrid")]
569 {
570 if !info.config().get_hybrid() {
571 return None;
572 }
573 let dfa_config = hybrid::dfa::Config::new()
574 .match_kind(info.config().get_match_kind())
575 .prefilter(pre.clone())
576 // Enabling this is necessary for ensuring we can service any
577 // kind of 'Input' search without error. For the lazy DFA,
578 // this is not particularly costly, since the start states are
579 // generated lazily.
580 .starts_for_each_pattern(true)
581 .byte_classes(info.config().get_byte_classes())
582 .unicode_word_boundary(true)
583 .specialize_start_states(pre.is_some())
584 .cache_capacity(info.config().get_hybrid_cache_capacity())
585 // This makes it possible for building a lazy DFA to
586 // fail even though the NFA has already been built. Namely,
587 // if the cache capacity is too small to fit some minimum
588 // number of states (which is small, like 4 or 5), then the
589 // DFA will refuse to build.
590 //
591 // We shouldn't enable this to make building always work, since
592 // this could cause the allocation of a cache bigger than the
593 // provided capacity amount.
594 //
595 // This is effectively the only reason why building a lazy DFA
596 // could fail. If it does, then we simply suppress the error
597 // and return None.
598 .skip_cache_capacity_check(false)
599 // This and enabling heuristic Unicode word boundary support
600 // above make it so the lazy DFA can quit at match time.
601 .minimum_cache_clear_count(Some(3))
602 .minimum_bytes_per_state(Some(10));
603 let result = hybrid::dfa::Builder::new()
604 .configure(dfa_config.clone())
605 .build_from_nfa(nfa.clone());
606 let fwd = match result {
607 Ok(fwd) => fwd,
608 Err(_err) => {
609 debug!("forward lazy DFA failed to build: {}", _err);
610 return None;
611 }
612 };
613 let result = hybrid::dfa::Builder::new()
614 .configure(
615 dfa_config
616 .clone()
617 .match_kind(MatchKind::All)
618 .prefilter(None)
619 .specialize_start_states(false),
620 )
621 .build_from_nfa(nfarev.clone());
622 let rev = match result {
623 Ok(rev) => rev,
624 Err(_err) => {
625 debug!("reverse lazy DFA failed to build: {}", _err);
626 return None;
627 }
628 };
629 let engine =
630 hybrid::regex::Builder::new().build_from_dfas(fwd, rev);
631 debug!("lazy DFA built");
632 Some(HybridEngine(engine))
633 }
634 #[cfg(not(feature = "hybrid"))]
635 {
636 None
637 }
638 }
639
640 #[cfg_attr(feature = "perf-inline", inline(always))]
641 pub(crate) fn try_search(
642 &self,
643 cache: &mut HybridCache,
644 input: &Input<'_>,
645 ) -> Result<Option<Match>, RetryFailError> {
646 #[cfg(feature = "hybrid")]
647 {
648 let cache = cache.0.as_mut().unwrap();
649 self.0.try_search(cache, input).map_err(|e| e.into())
650 }
651 #[cfg(not(feature = "hybrid"))]
652 {
653 // Impossible to reach because this engine is never constructed
654 // if the requisite features aren't enabled.
655 unreachable!()
656 }
657 }
658
659 #[cfg_attr(feature = "perf-inline", inline(always))]
660 pub(crate) fn try_search_half_fwd(
661 &self,
662 cache: &mut HybridCache,
663 input: &Input<'_>,
664 ) -> Result<Option<HalfMatch>, RetryFailError> {
665 #[cfg(feature = "hybrid")]
666 {
667 let fwd = self.0.forward();
668 let mut fwdcache = cache.0.as_mut().unwrap().as_parts_mut().0;
669 fwd.try_search_fwd(&mut fwdcache, input).map_err(|e| e.into())
670 }
671 #[cfg(not(feature = "hybrid"))]
672 {
673 // Impossible to reach because this engine is never constructed
674 // if the requisite features aren't enabled.
675 unreachable!()
676 }
677 }
678
679 #[cfg_attr(feature = "perf-inline", inline(always))]
680 pub(crate) fn try_search_half_fwd_stopat(
681 &self,
682 cache: &mut HybridCache,
683 input: &Input<'_>,
684 ) -> Result<Result<HalfMatch, usize>, RetryFailError> {
685 #[cfg(feature = "hybrid")]
686 {
687 let dfa = self.0.forward();
688 let mut cache = cache.0.as_mut().unwrap().as_parts_mut().0;
689 crate::meta::stopat::hybrid_try_search_half_fwd(
690 dfa, &mut cache, input,
691 )
692 }
693 #[cfg(not(feature = "hybrid"))]
694 {
695 // Impossible to reach because this engine is never constructed
696 // if the requisite features aren't enabled.
697 unreachable!()
698 }
699 }
700
701 #[cfg_attr(feature = "perf-inline", inline(always))]
702 pub(crate) fn try_search_half_rev(
703 &self,
704 cache: &mut HybridCache,
705 input: &Input<'_>,
706 ) -> Result<Option<HalfMatch>, RetryFailError> {
707 #[cfg(feature = "hybrid")]
708 {
709 let rev = self.0.reverse();
710 let mut revcache = cache.0.as_mut().unwrap().as_parts_mut().1;
711 rev.try_search_rev(&mut revcache, input).map_err(|e| e.into())
712 }
713 #[cfg(not(feature = "hybrid"))]
714 {
715 // Impossible to reach because this engine is never constructed
716 // if the requisite features aren't enabled.
717 unreachable!()
718 }
719 }
720
721 #[cfg_attr(feature = "perf-inline", inline(always))]
722 pub(crate) fn try_search_half_rev_limited(
723 &self,
724 cache: &mut HybridCache,
725 input: &Input<'_>,
726 min_start: usize,
727 ) -> Result<Option<HalfMatch>, RetryError> {
728 #[cfg(feature = "hybrid")]
729 {
730 let dfa = self.0.reverse();
731 let mut cache = cache.0.as_mut().unwrap().as_parts_mut().1;
732 crate::meta::limited::hybrid_try_search_half_rev(
733 dfa, &mut cache, input, min_start,
734 )
735 }
736 #[cfg(not(feature = "hybrid"))]
737 {
738 // Impossible to reach because this engine is never constructed
739 // if the requisite features aren't enabled.
740 unreachable!()
741 }
742 }
743
744 #[inline]
745 pub(crate) fn try_which_overlapping_matches(
746 &self,
747 cache: &mut HybridCache,
748 input: &Input<'_>,
749 patset: &mut PatternSet,
750 ) -> Result<(), RetryFailError> {
751 #[cfg(feature = "hybrid")]
752 {
753 let fwd = self.0.forward();
754 let mut fwdcache = cache.0.as_mut().unwrap().as_parts_mut().0;
755 fwd.try_which_overlapping_matches(&mut fwdcache, input, patset)
756 .map_err(|e| e.into())
757 }
758 #[cfg(not(feature = "hybrid"))]
759 {
760 // Impossible to reach because this engine is never constructed
761 // if the requisite features aren't enabled.
762 unreachable!()
763 }
764 }
765}
766
767#[derive(Clone, Debug)]
768pub(crate) struct HybridCache(
769 #[cfg(feature = "hybrid")] Option<hybrid::regex::Cache>,
770 #[cfg(not(feature = "hybrid"))] (),
771);
772
773impl HybridCache {
774 pub(crate) fn none() -> HybridCache {
775 #[cfg(feature = "hybrid")]
776 {
777 HybridCache(None)
778 }
779 #[cfg(not(feature = "hybrid"))]
780 {
781 HybridCache(())
782 }
783 }
784
785 pub(crate) fn new(builder: &Hybrid) -> HybridCache {
786 #[cfg(feature = "hybrid")]
787 {
788 HybridCache(builder.0.as_ref().map(|e| e.0.create_cache()))
789 }
790 #[cfg(not(feature = "hybrid"))]
791 {
792 HybridCache(())
793 }
794 }
795
796 pub(crate) fn reset(&mut self, builder: &Hybrid) {
797 #[cfg(feature = "hybrid")]
798 if let Some(ref e) = builder.0 {
799 self.0.as_mut().unwrap().reset(&e.0);
800 }
801 }
802
803 pub(crate) fn memory_usage(&self) -> usize {
804 #[cfg(feature = "hybrid")]
805 {
806 self.0.as_ref().map_or(0, |c| c.memory_usage())
807 }
808 #[cfg(not(feature = "hybrid"))]
809 {
810 0
811 }
812 }
813}
814
815#[derive(Debug)]
816pub(crate) struct DFA(Option<DFAEngine>);
817
818impl DFA {
819 pub(crate) fn none() -> DFA {
820 DFA(None)
821 }
822
823 pub(crate) fn new(
824 info: &RegexInfo,
825 pre: Option<Prefilter>,
826 nfa: &NFA,
827 nfarev: &NFA,
828 ) -> DFA {
829 DFA(DFAEngine::new(info, pre, nfa, nfarev))
830 }
831
832 #[cfg_attr(feature = "perf-inline", inline(always))]
833 pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&DFAEngine> {
834 let engine = self.0.as_ref()?;
835 Some(engine)
836 }
837
838 pub(crate) fn is_some(&self) -> bool {
839 self.0.is_some()
840 }
841
842 pub(crate) fn memory_usage(&self) -> usize {
843 self.0.as_ref().map_or(0, |e| e.memory_usage())
844 }
845}
846
847#[derive(Debug)]
848pub(crate) struct DFAEngine(
849 #[cfg(feature = "dfa-build")] dfa::regex::Regex,
850 #[cfg(not(feature = "dfa-build"))] (),
851);
852
853impl DFAEngine {
854 pub(crate) fn new(
855 info: &RegexInfo,
856 pre: Option<Prefilter>,
857 nfa: &NFA,
858 nfarev: &NFA,
859 ) -> Option<DFAEngine> {
860 #[cfg(feature = "dfa-build")]
861 {
862 if !info.config().get_dfa() {
863 return None;
864 }
865 // If our NFA is anything but small, don't even bother with a DFA.
866 if let Some(state_limit) = info.config().get_dfa_state_limit() {
867 if nfa.states().len() > state_limit {
868 debug!(
869 "skipping full DFA because NFA has {} states, \
870 which exceeds the heuristic limit of {}",
871 nfa.states().len(),
872 state_limit,
873 );
874 return None;
875 }
876 }
877 // We cut the size limit in four because the total heap used by
878 // DFA construction is determinization aux memory and the DFA
879 // itself, and those things are configured independently in the
880 // lower level DFA builder API. And then split that in two because
881 // of forward and reverse DFAs.
882 let size_limit = info.config().get_dfa_size_limit().map(|n| n / 4);
883 let dfa_config = dfa::dense::Config::new()
884 .match_kind(info.config().get_match_kind())
885 .prefilter(pre.clone())
886 // Enabling this is necessary for ensuring we can service any
887 // kind of 'Input' search without error. For the full DFA, this
888 // can be quite costly. But since we have such a small bound
889 // on the size of the DFA, in practice, any multl-regexes are
890 // probably going to blow the limit anyway.
891 .starts_for_each_pattern(true)
892 .byte_classes(info.config().get_byte_classes())
893 .unicode_word_boundary(true)
894 .specialize_start_states(pre.is_some())
895 .determinize_size_limit(size_limit)
896 .dfa_size_limit(size_limit);
897 let result = dfa::dense::Builder::new()
898 .configure(dfa_config.clone())
899 .build_from_nfa(&nfa);
900 let fwd = match result {
901 Ok(fwd) => fwd,
902 Err(_err) => {
903 debug!("forward full DFA failed to build: {}", _err);
904 return None;
905 }
906 };
907 let result = dfa::dense::Builder::new()
908 .configure(
909 dfa_config
910 .clone()
911 // We never need unanchored reverse searches, so
912 // there's no point in building it into the DFA, which
913 // WILL take more space. (This isn't done for the lazy
914 // DFA because the DFA is, well, lazy. It doesn't pay
915 // the cost for supporting unanchored searches unless
916 // you actually do an unanchored search, which we
917 // don't.)
918 .start_kind(dfa::StartKind::Anchored)
919 .match_kind(MatchKind::All)
920 .prefilter(None)
921 .specialize_start_states(false),
922 )
923 .build_from_nfa(&nfarev);
924 let rev = match result {
925 Ok(rev) => rev,
926 Err(_err) => {
927 debug!("reverse full DFA failed to build: {}", _err);
928 return None;
929 }
930 };
931 let engine = dfa::regex::Builder::new().build_from_dfas(fwd, rev);
932 debug!(
933 "fully compiled forward and reverse DFAs built, {} bytes",
934 engine.forward().memory_usage()
935 + engine.reverse().memory_usage(),
936 );
937 Some(DFAEngine(engine))
938 }
939 #[cfg(not(feature = "dfa-build"))]
940 {
941 None
942 }
943 }
944
945 #[cfg_attr(feature = "perf-inline", inline(always))]
946 pub(crate) fn try_search(
947 &self,
948 input: &Input<'_>,
949 ) -> Result<Option<Match>, RetryFailError> {
950 #[cfg(feature = "dfa-build")]
951 {
952 self.0.try_search(input).map_err(|e| e.into())
953 }
954 #[cfg(not(feature = "dfa-build"))]
955 {
956 // Impossible to reach because this engine is never constructed
957 // if the requisite features aren't enabled.
958 unreachable!()
959 }
960 }
961
962 #[cfg_attr(feature = "perf-inline", inline(always))]
963 pub(crate) fn try_search_half_fwd(
964 &self,
965 input: &Input<'_>,
966 ) -> Result<Option<HalfMatch>, RetryFailError> {
967 #[cfg(feature = "dfa-build")]
968 {
969 use crate::dfa::Automaton;
970 self.0.forward().try_search_fwd(input).map_err(|e| e.into())
971 }
972 #[cfg(not(feature = "dfa-build"))]
973 {
974 // Impossible to reach because this engine is never constructed
975 // if the requisite features aren't enabled.
976 unreachable!()
977 }
978 }
979
980 #[cfg_attr(feature = "perf-inline", inline(always))]
981 pub(crate) fn try_search_half_fwd_stopat(
982 &self,
983 input: &Input<'_>,
984 ) -> Result<Result<HalfMatch, usize>, RetryFailError> {
985 #[cfg(feature = "dfa-build")]
986 {
987 let dfa = self.0.forward();
988 crate::meta::stopat::dfa_try_search_half_fwd(dfa, input)
989 }
990 #[cfg(not(feature = "dfa-build"))]
991 {
992 // Impossible to reach because this engine is never constructed
993 // if the requisite features aren't enabled.
994 unreachable!()
995 }
996 }
997
998 #[cfg_attr(feature = "perf-inline", inline(always))]
999 pub(crate) fn try_search_half_rev(
1000 &self,
1001 input: &Input<'_>,
1002 ) -> Result<Option<HalfMatch>, RetryFailError> {
1003 #[cfg(feature = "dfa-build")]
1004 {
1005 use crate::dfa::Automaton;
1006 self.0.reverse().try_search_rev(&input).map_err(|e| e.into())
1007 }
1008 #[cfg(not(feature = "dfa-build"))]
1009 {
1010 // Impossible to reach because this engine is never constructed
1011 // if the requisite features aren't enabled.
1012 unreachable!()
1013 }
1014 }
1015
1016 #[cfg_attr(feature = "perf-inline", inline(always))]
1017 pub(crate) fn try_search_half_rev_limited(
1018 &self,
1019 input: &Input<'_>,
1020 min_start: usize,
1021 ) -> Result<Option<HalfMatch>, RetryError> {
1022 #[cfg(feature = "dfa-build")]
1023 {
1024 let dfa = self.0.reverse();
1025 crate::meta::limited::dfa_try_search_half_rev(
1026 dfa, input, min_start,
1027 )
1028 }
1029 #[cfg(not(feature = "dfa-build"))]
1030 {
1031 // Impossible to reach because this engine is never constructed
1032 // if the requisite features aren't enabled.
1033 unreachable!()
1034 }
1035 }
1036
1037 #[inline]
1038 pub(crate) fn try_which_overlapping_matches(
1039 &self,
1040 input: &Input<'_>,
1041 patset: &mut PatternSet,
1042 ) -> Result<(), RetryFailError> {
1043 #[cfg(feature = "dfa-build")]
1044 {
1045 use crate::dfa::Automaton;
1046 self.0
1047 .forward()
1048 .try_which_overlapping_matches(input, patset)
1049 .map_err(|e| e.into())
1050 }
1051 #[cfg(not(feature = "dfa-build"))]
1052 {
1053 // Impossible to reach because this engine is never constructed
1054 // if the requisite features aren't enabled.
1055 unreachable!()
1056 }
1057 }
1058
1059 pub(crate) fn memory_usage(&self) -> usize {
1060 #[cfg(feature = "dfa-build")]
1061 {
1062 self.0.forward().memory_usage() + self.0.reverse().memory_usage()
1063 }
1064 #[cfg(not(feature = "dfa-build"))]
1065 {
1066 // Impossible to reach because this engine is never constructed
1067 // if the requisite features aren't enabled.
1068 unreachable!()
1069 }
1070 }
1071}
1072
1073#[derive(Debug)]
1074pub(crate) struct ReverseHybrid(Option<ReverseHybridEngine>);
1075
1076impl ReverseHybrid {
1077 pub(crate) fn none() -> ReverseHybrid {
1078 ReverseHybrid(None)
1079 }
1080
1081 pub(crate) fn new(info: &RegexInfo, nfarev: &NFA) -> ReverseHybrid {
1082 ReverseHybrid(ReverseHybridEngine::new(info, nfarev))
1083 }
1084
1085 pub(crate) fn create_cache(&self) -> ReverseHybridCache {
1086 ReverseHybridCache::new(self)
1087 }
1088
1089 #[cfg_attr(feature = "perf-inline", inline(always))]
1090 pub(crate) fn get(
1091 &self,
1092 _input: &Input<'_>,
1093 ) -> Option<&ReverseHybridEngine> {
1094 let engine = self.0.as_ref()?;
1095 Some(engine)
1096 }
1097}
1098
1099#[derive(Debug)]
1100pub(crate) struct ReverseHybridEngine(
1101 #[cfg(feature = "hybrid")] hybrid::dfa::DFA,
1102 #[cfg(not(feature = "hybrid"))] (),
1103);
1104
1105impl ReverseHybridEngine {
1106 pub(crate) fn new(
1107 info: &RegexInfo,
1108 nfarev: &NFA,
1109 ) -> Option<ReverseHybridEngine> {
1110 #[cfg(feature = "hybrid")]
1111 {
1112 if !info.config().get_hybrid() {
1113 return None;
1114 }
1115 // Since we only use this for reverse searches, we can hard-code
1116 // a number of things like match semantics, prefilters, starts
1117 // for each pattern and so on.
1118 let dfa_config = hybrid::dfa::Config::new()
1119 .match_kind(MatchKind::All)
1120 .prefilter(None)
1121 .starts_for_each_pattern(false)
1122 .byte_classes(info.config().get_byte_classes())
1123 .unicode_word_boundary(true)
1124 .specialize_start_states(false)
1125 .cache_capacity(info.config().get_hybrid_cache_capacity())
1126 .skip_cache_capacity_check(false)
1127 .minimum_cache_clear_count(Some(3))
1128 .minimum_bytes_per_state(Some(10));
1129 let result = hybrid::dfa::Builder::new()
1130 .configure(dfa_config)
1131 .build_from_nfa(nfarev.clone());
1132 let rev = match result {
1133 Ok(rev) => rev,
1134 Err(_err) => {
1135 debug!("lazy reverse DFA failed to build: {}", _err);
1136 return None;
1137 }
1138 };
1139 debug!("lazy reverse DFA built");
1140 Some(ReverseHybridEngine(rev))
1141 }
1142 #[cfg(not(feature = "hybrid"))]
1143 {
1144 None
1145 }
1146 }
1147
1148 #[cfg_attr(feature = "perf-inline", inline(always))]
1149 pub(crate) fn try_search_half_rev_limited(
1150 &self,
1151 cache: &mut ReverseHybridCache,
1152 input: &Input<'_>,
1153 min_start: usize,
1154 ) -> Result<Option<HalfMatch>, RetryError> {
1155 #[cfg(feature = "hybrid")]
1156 {
1157 let dfa = &self.0;
1158 let mut cache = cache.0.as_mut().unwrap();
1159 crate::meta::limited::hybrid_try_search_half_rev(
1160 dfa, &mut cache, input, min_start,
1161 )
1162 }
1163 #[cfg(not(feature = "hybrid"))]
1164 {
1165 // Impossible to reach because this engine is never constructed
1166 // if the requisite features aren't enabled.
1167 unreachable!()
1168 }
1169 }
1170}
1171
1172#[derive(Clone, Debug)]
1173pub(crate) struct ReverseHybridCache(
1174 #[cfg(feature = "hybrid")] Option<hybrid::dfa::Cache>,
1175 #[cfg(not(feature = "hybrid"))] (),
1176);
1177
1178impl ReverseHybridCache {
1179 pub(crate) fn none() -> ReverseHybridCache {
1180 #[cfg(feature = "hybrid")]
1181 {
1182 ReverseHybridCache(None)
1183 }
1184 #[cfg(not(feature = "hybrid"))]
1185 {
1186 ReverseHybridCache(())
1187 }
1188 }
1189
1190 pub(crate) fn new(builder: &ReverseHybrid) -> ReverseHybridCache {
1191 #[cfg(feature = "hybrid")]
1192 {
1193 ReverseHybridCache(builder.0.as_ref().map(|e| e.0.create_cache()))
1194 }
1195 #[cfg(not(feature = "hybrid"))]
1196 {
1197 ReverseHybridCache(())
1198 }
1199 }
1200
1201 pub(crate) fn reset(&mut self, builder: &ReverseHybrid) {
1202 #[cfg(feature = "hybrid")]
1203 if let Some(ref e) = builder.0 {
1204 self.0.as_mut().unwrap().reset(&e.0);
1205 }
1206 }
1207
1208 pub(crate) fn memory_usage(&self) -> usize {
1209 #[cfg(feature = "hybrid")]
1210 {
1211 self.0.as_ref().map_or(0, |c| c.memory_usage())
1212 }
1213 #[cfg(not(feature = "hybrid"))]
1214 {
1215 0
1216 }
1217 }
1218}
1219
1220#[derive(Debug)]
1221pub(crate) struct ReverseDFA(Option<ReverseDFAEngine>);
1222
1223impl ReverseDFA {
1224 pub(crate) fn none() -> ReverseDFA {
1225 ReverseDFA(None)
1226 }
1227
1228 pub(crate) fn new(info: &RegexInfo, nfarev: &NFA) -> ReverseDFA {
1229 ReverseDFA(ReverseDFAEngine::new(info, nfarev))
1230 }
1231
1232 #[cfg_attr(feature = "perf-inline", inline(always))]
1233 pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&ReverseDFAEngine> {
1234 let engine = self.0.as_ref()?;
1235 Some(engine)
1236 }
1237
1238 pub(crate) fn is_some(&self) -> bool {
1239 self.0.is_some()
1240 }
1241
1242 pub(crate) fn memory_usage(&self) -> usize {
1243 self.0.as_ref().map_or(0, |e| e.memory_usage())
1244 }
1245}
1246
1247#[derive(Debug)]
1248pub(crate) struct ReverseDFAEngine(
1249 #[cfg(feature = "dfa-build")] dfa::dense::DFA<Vec<u32>>,
1250 #[cfg(not(feature = "dfa-build"))] (),
1251);
1252
1253impl ReverseDFAEngine {
1254 pub(crate) fn new(
1255 info: &RegexInfo,
1256 nfarev: &NFA,
1257 ) -> Option<ReverseDFAEngine> {
1258 #[cfg(feature = "dfa-build")]
1259 {
1260 if !info.config().get_dfa() {
1261 return None;
1262 }
1263 // If our NFA is anything but small, don't even bother with a DFA.
1264 if let Some(state_limit) = info.config().get_dfa_state_limit() {
1265 if nfarev.states().len() > state_limit {
1266 debug!(
1267 "skipping full reverse DFA because NFA has {} states, \
1268 which exceeds the heuristic limit of {}",
1269 nfarev.states().len(),
1270 state_limit,
1271 );
1272 return None;
1273 }
1274 }
1275 // We cut the size limit in two because the total heap used by DFA
1276 // construction is determinization aux memory and the DFA itself,
1277 // and those things are configured independently in the lower level
1278 // DFA builder API.
1279 let size_limit = info.config().get_dfa_size_limit().map(|n| n / 2);
1280 // Since we only use this for reverse searches, we can hard-code
1281 // a number of things like match semantics, prefilters, starts
1282 // for each pattern and so on. We also disable acceleration since
1283 // it's incompatible with limited searches (which is the only
1284 // operation we support for this kind of engine at the moment).
1285 let dfa_config = dfa::dense::Config::new()
1286 .match_kind(MatchKind::All)
1287 .prefilter(None)
1288 .accelerate(false)
1289 .start_kind(dfa::StartKind::Anchored)
1290 .starts_for_each_pattern(false)
1291 .byte_classes(info.config().get_byte_classes())
1292 .unicode_word_boundary(true)
1293 .specialize_start_states(false)
1294 .determinize_size_limit(size_limit)
1295 .dfa_size_limit(size_limit);
1296 let result = dfa::dense::Builder::new()
1297 .configure(dfa_config)
1298 .build_from_nfa(&nfarev);
1299 let rev = match result {
1300 Ok(rev) => rev,
1301 Err(_err) => {
1302 debug!("full reverse DFA failed to build: {}", _err);
1303 return None;
1304 }
1305 };
1306 debug!(
1307 "fully compiled reverse DFA built, {} bytes",
1308 rev.memory_usage()
1309 );
1310 Some(ReverseDFAEngine(rev))
1311 }
1312 #[cfg(not(feature = "dfa-build"))]
1313 {
1314 None
1315 }
1316 }
1317
1318 #[cfg_attr(feature = "perf-inline", inline(always))]
1319 pub(crate) fn try_search_half_rev_limited(
1320 &self,
1321 input: &Input<'_>,
1322 min_start: usize,
1323 ) -> Result<Option<HalfMatch>, RetryError> {
1324 #[cfg(feature = "dfa-build")]
1325 {
1326 let dfa = &self.0;
1327 crate::meta::limited::dfa_try_search_half_rev(
1328 dfa, input, min_start,
1329 )
1330 }
1331 #[cfg(not(feature = "dfa-build"))]
1332 {
1333 // Impossible to reach because this engine is never constructed
1334 // if the requisite features aren't enabled.
1335 unreachable!()
1336 }
1337 }
1338
1339 pub(crate) fn memory_usage(&self) -> usize {
1340 #[cfg(feature = "dfa-build")]
1341 {
1342 self.0.memory_usage()
1343 }
1344 #[cfg(not(feature = "dfa-build"))]
1345 {
1346 // Impossible to reach because this engine is never constructed
1347 // if the requisite features aren't enabled.
1348 unreachable!()
1349 }
1350 }
1351}
1352