1 | /*! |
2 | This module contains a boat load of wrappers around each of our internal regex |
3 | engines. They encapsulate a few things: |
4 | |
5 | 1. The wrappers manage the conditional existence of the regex engine. Namely, |
6 | the PikeVM is the only required regex engine. The rest are optional. These |
7 | wrappers present a uniform API regardless of which engines are available. And |
8 | availability might be determined by compile time features or by dynamic |
9 | configuration via `meta::Config`. Encapsulating the conditional compilation |
10 | features is in particular a huge simplification for the higher level code that |
11 | composes these engines. |
12 | 2. The wrappers manage construction of each engine, including skipping it if |
13 | the engine is unavailable or configured to not be used. |
14 | 3. The wrappers manage whether an engine *can* be used for a particular |
15 | search configuration. For example, `BoundedBacktracker::get` only returns a |
16 | backtracking engine when the haystack is bigger than the maximum supported |
17 | length. The wrappers also sometimes take a position on when an engine *ought* |
18 | to be used, but only in cases where the logic is extremely local to the engine |
19 | itself. Otherwise, things like "choose between the backtracker and the one-pass |
20 | DFA" are managed by the higher level meta strategy code. |
21 | |
22 | There are also corresponding wrappers for the various `Cache` types for each |
23 | regex engine that needs them. If an engine is unavailable or not used, then a |
24 | cache for it will *not* actually be allocated. |
25 | */ |
26 | |
27 | use alloc::vec::Vec; |
28 | |
29 | use crate::{ |
30 | meta::{ |
31 | error::{BuildError, RetryError, RetryFailError}, |
32 | regex::RegexInfo, |
33 | }, |
34 | nfa::thompson::{pikevm, NFA}, |
35 | util::{prefilter::Prefilter, primitives::NonMaxUsize}, |
36 | HalfMatch, Input, Match, MatchKind, PatternID, PatternSet, |
37 | }; |
38 | |
39 | #[cfg (feature = "dfa-build" )] |
40 | use crate::dfa; |
41 | #[cfg (feature = "dfa-onepass" )] |
42 | use crate::dfa::onepass; |
43 | #[cfg (feature = "hybrid" )] |
44 | use crate::hybrid; |
45 | #[cfg (feature = "nfa-backtrack" )] |
46 | use crate::nfa::thompson::backtrack; |
47 | |
48 | #[derive(Debug)] |
49 | pub(crate) struct PikeVM(PikeVMEngine); |
50 | |
51 | impl PikeVM { |
52 | pub(crate) fn new( |
53 | info: &RegexInfo, |
54 | pre: Option<Prefilter>, |
55 | nfa: &NFA, |
56 | ) -> Result<PikeVM, BuildError> { |
57 | PikeVMEngine::new(info, pre, nfa).map(PikeVM) |
58 | } |
59 | |
60 | pub(crate) fn create_cache(&self) -> PikeVMCache { |
61 | PikeVMCache::new(self) |
62 | } |
63 | |
64 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
65 | pub(crate) fn get(&self) -> &PikeVMEngine { |
66 | &self.0 |
67 | } |
68 | } |
69 | |
70 | #[derive(Debug)] |
71 | pub(crate) struct PikeVMEngine(pikevm::PikeVM); |
72 | |
73 | impl PikeVMEngine { |
74 | pub(crate) fn new( |
75 | info: &RegexInfo, |
76 | pre: Option<Prefilter>, |
77 | nfa: &NFA, |
78 | ) -> Result<PikeVMEngine, BuildError> { |
79 | let pikevm_config = pikevm::Config::new() |
80 | .match_kind(info.config().get_match_kind()) |
81 | .prefilter(pre); |
82 | let engine = pikevm::Builder::new() |
83 | .configure(pikevm_config) |
84 | .build_from_nfa(nfa.clone()) |
85 | .map_err(BuildError::nfa)?; |
86 | debug!("PikeVM built" ); |
87 | Ok(PikeVMEngine(engine)) |
88 | } |
89 | |
90 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
91 | pub(crate) fn is_match( |
92 | &self, |
93 | cache: &mut PikeVMCache, |
94 | input: &Input<'_>, |
95 | ) -> bool { |
96 | self.0.is_match(cache.0.as_mut().unwrap(), input.clone()) |
97 | } |
98 | |
99 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
100 | pub(crate) fn search_slots( |
101 | &self, |
102 | cache: &mut PikeVMCache, |
103 | input: &Input<'_>, |
104 | slots: &mut [Option<NonMaxUsize>], |
105 | ) -> Option<PatternID> { |
106 | self.0.search_slots(cache.0.as_mut().unwrap(), input, slots) |
107 | } |
108 | |
109 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
110 | pub(crate) fn which_overlapping_matches( |
111 | &self, |
112 | cache: &mut PikeVMCache, |
113 | input: &Input<'_>, |
114 | patset: &mut PatternSet, |
115 | ) { |
116 | self.0.which_overlapping_matches( |
117 | cache.0.as_mut().unwrap(), |
118 | input, |
119 | patset, |
120 | ) |
121 | } |
122 | } |
123 | |
124 | #[derive(Clone, Debug)] |
125 | pub(crate) struct PikeVMCache(Option<pikevm::Cache>); |
126 | |
127 | impl PikeVMCache { |
128 | pub(crate) fn none() -> PikeVMCache { |
129 | PikeVMCache(None) |
130 | } |
131 | |
132 | pub(crate) fn new(builder: &PikeVM) -> PikeVMCache { |
133 | PikeVMCache(Some(builder.get().0.create_cache())) |
134 | } |
135 | |
136 | pub(crate) fn reset(&mut self, builder: &PikeVM) { |
137 | self.0.as_mut().unwrap().reset(&builder.get().0); |
138 | } |
139 | |
140 | pub(crate) fn memory_usage(&self) -> usize { |
141 | self.0.as_ref().map_or(0, |c| c.memory_usage()) |
142 | } |
143 | } |
144 | |
145 | #[derive(Debug)] |
146 | pub(crate) struct BoundedBacktracker(Option<BoundedBacktrackerEngine>); |
147 | |
148 | impl BoundedBacktracker { |
149 | pub(crate) fn new( |
150 | info: &RegexInfo, |
151 | pre: Option<Prefilter>, |
152 | nfa: &NFA, |
153 | ) -> Result<BoundedBacktracker, BuildError> { |
154 | BoundedBacktrackerEngine::new(info, pre, nfa).map(BoundedBacktracker) |
155 | } |
156 | |
157 | pub(crate) fn create_cache(&self) -> BoundedBacktrackerCache { |
158 | BoundedBacktrackerCache::new(self) |
159 | } |
160 | |
161 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
162 | pub(crate) fn get( |
163 | &self, |
164 | input: &Input<'_>, |
165 | ) -> Option<&BoundedBacktrackerEngine> { |
166 | let engine = self.0.as_ref()?; |
167 | // It is difficult to make the backtracker give up early if it is |
168 | // guaranteed to eventually wind up in a match state. This is because |
169 | // of the greedy nature of a backtracker: it just blindly mushes |
170 | // forward. Every other regex engine is able to give up more quickly, |
171 | // so even if the backtracker might be able to zip through faster than |
172 | // (say) the PikeVM, we prefer the theoretical benefit that some other |
173 | // engine might be able to scan much less of the haystack than the |
174 | // backtracker. |
175 | // |
176 | // Now, if the haystack is really short already, then we allow the |
177 | // backtracker to run. (This hasn't been litigated quantitatively with |
178 | // benchmarks. Just a hunch.) |
179 | if input.get_earliest() && input.haystack().len() > 128 { |
180 | return None; |
181 | } |
182 | // If the backtracker is just going to return an error because the |
183 | // haystack is too long, then obviously do not use it. |
184 | if input.get_span().len() > engine.max_haystack_len() { |
185 | return None; |
186 | } |
187 | Some(engine) |
188 | } |
189 | } |
190 | |
191 | #[derive(Debug)] |
192 | pub(crate) struct BoundedBacktrackerEngine( |
193 | #[cfg (feature = "nfa-backtrack" )] backtrack::BoundedBacktracker, |
194 | #[cfg (not(feature = "nfa-backtrack" ))] (), |
195 | ); |
196 | |
197 | impl BoundedBacktrackerEngine { |
198 | pub(crate) fn new( |
199 | info: &RegexInfo, |
200 | pre: Option<Prefilter>, |
201 | nfa: &NFA, |
202 | ) -> Result<Option<BoundedBacktrackerEngine>, BuildError> { |
203 | #[cfg (feature = "nfa-backtrack" )] |
204 | { |
205 | if !info.config().get_backtrack() |
206 | || info.config().get_match_kind() != MatchKind::LeftmostFirst |
207 | { |
208 | return Ok(None); |
209 | } |
210 | let backtrack_config = backtrack::Config::new().prefilter(pre); |
211 | let engine = backtrack::Builder::new() |
212 | .configure(backtrack_config) |
213 | .build_from_nfa(nfa.clone()) |
214 | .map_err(BuildError::nfa)?; |
215 | debug!( |
216 | "BoundedBacktracker built (max haystack length: {:?})" , |
217 | engine.max_haystack_len() |
218 | ); |
219 | Ok(Some(BoundedBacktrackerEngine(engine))) |
220 | } |
221 | #[cfg (not(feature = "nfa-backtrack" ))] |
222 | { |
223 | Ok(None) |
224 | } |
225 | } |
226 | |
227 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
228 | pub(crate) fn is_match( |
229 | &self, |
230 | cache: &mut BoundedBacktrackerCache, |
231 | input: &Input<'_>, |
232 | ) -> bool { |
233 | #[cfg (feature = "nfa-backtrack" )] |
234 | { |
235 | // OK because we only permit access to this engine when we know |
236 | // the haystack is short enough for the backtracker to run without |
237 | // reporting an error. |
238 | self.0 |
239 | .try_is_match(cache.0.as_mut().unwrap(), input.clone()) |
240 | .unwrap() |
241 | } |
242 | #[cfg (not(feature = "nfa-backtrack" ))] |
243 | { |
244 | // Impossible to reach because this engine is never constructed |
245 | // if the requisite features aren't enabled. |
246 | unreachable!() |
247 | } |
248 | } |
249 | |
250 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
251 | pub(crate) fn search_slots( |
252 | &self, |
253 | cache: &mut BoundedBacktrackerCache, |
254 | input: &Input<'_>, |
255 | slots: &mut [Option<NonMaxUsize>], |
256 | ) -> Option<PatternID> { |
257 | #[cfg (feature = "nfa-backtrack" )] |
258 | { |
259 | // OK because we only permit access to this engine when we know |
260 | // the haystack is short enough for the backtracker to run without |
261 | // reporting an error. |
262 | self.0 |
263 | .try_search_slots(cache.0.as_mut().unwrap(), input, slots) |
264 | .unwrap() |
265 | } |
266 | #[cfg (not(feature = "nfa-backtrack" ))] |
267 | { |
268 | // Impossible to reach because this engine is never constructed |
269 | // if the requisite features aren't enabled. |
270 | unreachable!() |
271 | } |
272 | } |
273 | |
274 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
275 | fn max_haystack_len(&self) -> usize { |
276 | #[cfg (feature = "nfa-backtrack" )] |
277 | { |
278 | self.0.max_haystack_len() |
279 | } |
280 | #[cfg (not(feature = "nfa-backtrack" ))] |
281 | { |
282 | // Impossible to reach because this engine is never constructed |
283 | // if the requisite features aren't enabled. |
284 | unreachable!() |
285 | } |
286 | } |
287 | } |
288 | |
289 | #[derive(Clone, Debug)] |
290 | pub(crate) struct BoundedBacktrackerCache( |
291 | #[cfg (feature = "nfa-backtrack" )] Option<backtrack::Cache>, |
292 | #[cfg (not(feature = "nfa-backtrack" ))] (), |
293 | ); |
294 | |
295 | impl BoundedBacktrackerCache { |
296 | pub(crate) fn none() -> BoundedBacktrackerCache { |
297 | #[cfg (feature = "nfa-backtrack" )] |
298 | { |
299 | BoundedBacktrackerCache(None) |
300 | } |
301 | #[cfg (not(feature = "nfa-backtrack" ))] |
302 | { |
303 | BoundedBacktrackerCache(()) |
304 | } |
305 | } |
306 | |
307 | pub(crate) fn new( |
308 | builder: &BoundedBacktracker, |
309 | ) -> BoundedBacktrackerCache { |
310 | #[cfg (feature = "nfa-backtrack" )] |
311 | { |
312 | BoundedBacktrackerCache( |
313 | builder.0.as_ref().map(|e| e.0.create_cache()), |
314 | ) |
315 | } |
316 | #[cfg (not(feature = "nfa-backtrack" ))] |
317 | { |
318 | BoundedBacktrackerCache(()) |
319 | } |
320 | } |
321 | |
322 | pub(crate) fn reset(&mut self, builder: &BoundedBacktracker) { |
323 | #[cfg (feature = "nfa-backtrack" )] |
324 | if let Some(ref e) = builder.0 { |
325 | self.0.as_mut().unwrap().reset(&e.0); |
326 | } |
327 | } |
328 | |
329 | pub(crate) fn memory_usage(&self) -> usize { |
330 | #[cfg (feature = "nfa-backtrack" )] |
331 | { |
332 | self.0.as_ref().map_or(0, |c| c.memory_usage()) |
333 | } |
334 | #[cfg (not(feature = "nfa-backtrack" ))] |
335 | { |
336 | 0 |
337 | } |
338 | } |
339 | } |
340 | |
341 | #[derive(Debug)] |
342 | pub(crate) struct OnePass(Option<OnePassEngine>); |
343 | |
344 | impl OnePass { |
345 | pub(crate) fn new(info: &RegexInfo, nfa: &NFA) -> OnePass { |
346 | OnePass(OnePassEngine::new(info, nfa)) |
347 | } |
348 | |
349 | pub(crate) fn create_cache(&self) -> OnePassCache { |
350 | OnePassCache::new(self) |
351 | } |
352 | |
353 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
354 | pub(crate) fn get(&self, input: &Input<'_>) -> Option<&OnePassEngine> { |
355 | let engine = self.0.as_ref()?; |
356 | if !input.get_anchored().is_anchored() |
357 | && !engine.get_nfa().is_always_start_anchored() |
358 | { |
359 | return None; |
360 | } |
361 | Some(engine) |
362 | } |
363 | |
364 | pub(crate) fn memory_usage(&self) -> usize { |
365 | self.0.as_ref().map_or(0, |e| e.memory_usage()) |
366 | } |
367 | } |
368 | |
369 | #[derive(Debug)] |
370 | pub(crate) struct OnePassEngine( |
371 | #[cfg (feature = "dfa-onepass" )] onepass::DFA, |
372 | #[cfg (not(feature = "dfa-onepass" ))] (), |
373 | ); |
374 | |
375 | impl OnePassEngine { |
376 | pub(crate) fn new(info: &RegexInfo, nfa: &NFA) -> Option<OnePassEngine> { |
377 | #[cfg (feature = "dfa-onepass" )] |
378 | { |
379 | if !info.config().get_onepass() { |
380 | return None; |
381 | } |
382 | // In order to even attempt building a one-pass DFA, we require |
383 | // that we either have at least one explicit capturing group or |
384 | // there's a Unicode word boundary somewhere. If we don't have |
385 | // either of these things, then the lazy DFA will almost certainly |
386 | // be useable and be much faster. The only case where it might |
387 | // not is if the lazy DFA isn't utilizing its cache effectively, |
388 | // but in those cases, the underlying regex is almost certainly |
389 | // not one-pass or is too big to fit within the current one-pass |
390 | // implementation limits. |
391 | if info.props_union().explicit_captures_len() == 0 |
392 | && !info.props_union().look_set().contains_word_unicode() |
393 | { |
394 | debug!("not building OnePass because it isn't worth it" ); |
395 | return None; |
396 | } |
397 | let onepass_config = onepass::Config::new() |
398 | .match_kind(info.config().get_match_kind()) |
399 | // Like for the lazy DFA, we unconditionally enable this |
400 | // because it doesn't cost much and makes the API more |
401 | // flexible. |
402 | .starts_for_each_pattern(true) |
403 | .byte_classes(info.config().get_byte_classes()) |
404 | .size_limit(info.config().get_onepass_size_limit()); |
405 | let result = onepass::Builder::new() |
406 | .configure(onepass_config) |
407 | .build_from_nfa(nfa.clone()); |
408 | let engine = match result { |
409 | Ok(engine) => engine, |
410 | Err(_err) => { |
411 | debug!("OnePass failed to build: {}" , _err); |
412 | return None; |
413 | } |
414 | }; |
415 | debug!("OnePass built, {} bytes" , engine.memory_usage()); |
416 | Some(OnePassEngine(engine)) |
417 | } |
418 | #[cfg (not(feature = "dfa-onepass" ))] |
419 | { |
420 | None |
421 | } |
422 | } |
423 | |
424 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
425 | pub(crate) fn search_slots( |
426 | &self, |
427 | cache: &mut OnePassCache, |
428 | input: &Input<'_>, |
429 | slots: &mut [Option<NonMaxUsize>], |
430 | ) -> Option<PatternID> { |
431 | #[cfg (feature = "dfa-onepass" )] |
432 | { |
433 | // OK because we only permit getting a OnePassEngine when we know |
434 | // the search is anchored and thus an error cannot occur. |
435 | self.0 |
436 | .try_search_slots(cache.0.as_mut().unwrap(), input, slots) |
437 | .unwrap() |
438 | } |
439 | #[cfg (not(feature = "dfa-onepass" ))] |
440 | { |
441 | // Impossible to reach because this engine is never constructed |
442 | // if the requisite features aren't enabled. |
443 | unreachable!() |
444 | } |
445 | } |
446 | |
447 | pub(crate) fn memory_usage(&self) -> usize { |
448 | #[cfg (feature = "dfa-onepass" )] |
449 | { |
450 | self.0.memory_usage() |
451 | } |
452 | #[cfg (not(feature = "dfa-onepass" ))] |
453 | { |
454 | // Impossible to reach because this engine is never constructed |
455 | // if the requisite features aren't enabled. |
456 | unreachable!() |
457 | } |
458 | } |
459 | |
460 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
461 | fn get_nfa(&self) -> &NFA { |
462 | #[cfg (feature = "dfa-onepass" )] |
463 | { |
464 | self.0.get_nfa() |
465 | } |
466 | #[cfg (not(feature = "dfa-onepass" ))] |
467 | { |
468 | // Impossible to reach because this engine is never constructed |
469 | // if the requisite features aren't enabled. |
470 | unreachable!() |
471 | } |
472 | } |
473 | } |
474 | |
475 | #[derive(Clone, Debug)] |
476 | pub(crate) struct OnePassCache( |
477 | #[cfg (feature = "dfa-onepass" )] Option<onepass::Cache>, |
478 | #[cfg (not(feature = "dfa-onepass" ))] (), |
479 | ); |
480 | |
481 | impl OnePassCache { |
482 | pub(crate) fn none() -> OnePassCache { |
483 | #[cfg (feature = "dfa-onepass" )] |
484 | { |
485 | OnePassCache(None) |
486 | } |
487 | #[cfg (not(feature = "dfa-onepass" ))] |
488 | { |
489 | OnePassCache(()) |
490 | } |
491 | } |
492 | |
493 | pub(crate) fn new(builder: &OnePass) -> OnePassCache { |
494 | #[cfg (feature = "dfa-onepass" )] |
495 | { |
496 | OnePassCache(builder.0.as_ref().map(|e| e.0.create_cache())) |
497 | } |
498 | #[cfg (not(feature = "dfa-onepass" ))] |
499 | { |
500 | OnePassCache(()) |
501 | } |
502 | } |
503 | |
504 | pub(crate) fn reset(&mut self, builder: &OnePass) { |
505 | #[cfg (feature = "dfa-onepass" )] |
506 | if let Some(ref e) = builder.0 { |
507 | self.0.as_mut().unwrap().reset(&e.0); |
508 | } |
509 | } |
510 | |
511 | pub(crate) fn memory_usage(&self) -> usize { |
512 | #[cfg (feature = "dfa-onepass" )] |
513 | { |
514 | self.0.as_ref().map_or(0, |c| c.memory_usage()) |
515 | } |
516 | #[cfg (not(feature = "dfa-onepass" ))] |
517 | { |
518 | 0 |
519 | } |
520 | } |
521 | } |
522 | |
523 | #[derive(Debug)] |
524 | pub(crate) struct Hybrid(Option<HybridEngine>); |
525 | |
526 | impl Hybrid { |
527 | pub(crate) fn none() -> Hybrid { |
528 | Hybrid(None) |
529 | } |
530 | |
531 | pub(crate) fn new( |
532 | info: &RegexInfo, |
533 | pre: Option<Prefilter>, |
534 | nfa: &NFA, |
535 | nfarev: &NFA, |
536 | ) -> Hybrid { |
537 | Hybrid(HybridEngine::new(info, pre, nfa, nfarev)) |
538 | } |
539 | |
540 | pub(crate) fn create_cache(&self) -> HybridCache { |
541 | HybridCache::new(self) |
542 | } |
543 | |
544 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
545 | pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&HybridEngine> { |
546 | let engine = self.0.as_ref()?; |
547 | Some(engine) |
548 | } |
549 | |
550 | pub(crate) fn is_some(&self) -> bool { |
551 | self.0.is_some() |
552 | } |
553 | } |
554 | |
555 | #[derive(Debug)] |
556 | pub(crate) struct HybridEngine( |
557 | #[cfg (feature = "hybrid" )] hybrid::regex::Regex, |
558 | #[cfg (not(feature = "hybrid" ))] (), |
559 | ); |
560 | |
561 | impl HybridEngine { |
562 | pub(crate) fn new( |
563 | info: &RegexInfo, |
564 | pre: Option<Prefilter>, |
565 | nfa: &NFA, |
566 | nfarev: &NFA, |
567 | ) -> Option<HybridEngine> { |
568 | #[cfg (feature = "hybrid" )] |
569 | { |
570 | if !info.config().get_hybrid() { |
571 | return None; |
572 | } |
573 | let dfa_config = hybrid::dfa::Config::new() |
574 | .match_kind(info.config().get_match_kind()) |
575 | .prefilter(pre.clone()) |
576 | // Enabling this is necessary for ensuring we can service any |
577 | // kind of 'Input' search without error. For the lazy DFA, |
578 | // this is not particularly costly, since the start states are |
579 | // generated lazily. |
580 | .starts_for_each_pattern(true) |
581 | .byte_classes(info.config().get_byte_classes()) |
582 | .unicode_word_boundary(true) |
583 | .specialize_start_states(pre.is_some()) |
584 | .cache_capacity(info.config().get_hybrid_cache_capacity()) |
585 | // This makes it possible for building a lazy DFA to |
586 | // fail even though the NFA has already been built. Namely, |
587 | // if the cache capacity is too small to fit some minimum |
588 | // number of states (which is small, like 4 or 5), then the |
589 | // DFA will refuse to build. |
590 | // |
591 | // We shouldn't enable this to make building always work, since |
592 | // this could cause the allocation of a cache bigger than the |
593 | // provided capacity amount. |
594 | // |
595 | // This is effectively the only reason why building a lazy DFA |
596 | // could fail. If it does, then we simply suppress the error |
597 | // and return None. |
598 | .skip_cache_capacity_check(false) |
599 | // This and enabling heuristic Unicode word boundary support |
600 | // above make it so the lazy DFA can quit at match time. |
601 | .minimum_cache_clear_count(Some(3)) |
602 | .minimum_bytes_per_state(Some(10)); |
603 | let result = hybrid::dfa::Builder::new() |
604 | .configure(dfa_config.clone()) |
605 | .build_from_nfa(nfa.clone()); |
606 | let fwd = match result { |
607 | Ok(fwd) => fwd, |
608 | Err(_err) => { |
609 | debug!("forward lazy DFA failed to build: {}" , _err); |
610 | return None; |
611 | } |
612 | }; |
613 | let result = hybrid::dfa::Builder::new() |
614 | .configure( |
615 | dfa_config |
616 | .clone() |
617 | .match_kind(MatchKind::All) |
618 | .prefilter(None) |
619 | .specialize_start_states(false), |
620 | ) |
621 | .build_from_nfa(nfarev.clone()); |
622 | let rev = match result { |
623 | Ok(rev) => rev, |
624 | Err(_err) => { |
625 | debug!("reverse lazy DFA failed to build: {}" , _err); |
626 | return None; |
627 | } |
628 | }; |
629 | let engine = |
630 | hybrid::regex::Builder::new().build_from_dfas(fwd, rev); |
631 | debug!("lazy DFA built" ); |
632 | Some(HybridEngine(engine)) |
633 | } |
634 | #[cfg (not(feature = "hybrid" ))] |
635 | { |
636 | None |
637 | } |
638 | } |
639 | |
640 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
641 | pub(crate) fn try_search( |
642 | &self, |
643 | cache: &mut HybridCache, |
644 | input: &Input<'_>, |
645 | ) -> Result<Option<Match>, RetryFailError> { |
646 | #[cfg (feature = "hybrid" )] |
647 | { |
648 | let cache = cache.0.as_mut().unwrap(); |
649 | self.0.try_search(cache, input).map_err(|e| e.into()) |
650 | } |
651 | #[cfg (not(feature = "hybrid" ))] |
652 | { |
653 | // Impossible to reach because this engine is never constructed |
654 | // if the requisite features aren't enabled. |
655 | unreachable!() |
656 | } |
657 | } |
658 | |
659 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
660 | pub(crate) fn try_search_half_fwd( |
661 | &self, |
662 | cache: &mut HybridCache, |
663 | input: &Input<'_>, |
664 | ) -> Result<Option<HalfMatch>, RetryFailError> { |
665 | #[cfg (feature = "hybrid" )] |
666 | { |
667 | let fwd = self.0.forward(); |
668 | let mut fwdcache = cache.0.as_mut().unwrap().as_parts_mut().0; |
669 | fwd.try_search_fwd(&mut fwdcache, input).map_err(|e| e.into()) |
670 | } |
671 | #[cfg (not(feature = "hybrid" ))] |
672 | { |
673 | // Impossible to reach because this engine is never constructed |
674 | // if the requisite features aren't enabled. |
675 | unreachable!() |
676 | } |
677 | } |
678 | |
679 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
680 | pub(crate) fn try_search_half_fwd_stopat( |
681 | &self, |
682 | cache: &mut HybridCache, |
683 | input: &Input<'_>, |
684 | ) -> Result<Result<HalfMatch, usize>, RetryFailError> { |
685 | #[cfg (feature = "hybrid" )] |
686 | { |
687 | let dfa = self.0.forward(); |
688 | let mut cache = cache.0.as_mut().unwrap().as_parts_mut().0; |
689 | crate::meta::stopat::hybrid_try_search_half_fwd( |
690 | dfa, &mut cache, input, |
691 | ) |
692 | } |
693 | #[cfg (not(feature = "hybrid" ))] |
694 | { |
695 | // Impossible to reach because this engine is never constructed |
696 | // if the requisite features aren't enabled. |
697 | unreachable!() |
698 | } |
699 | } |
700 | |
701 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
702 | pub(crate) fn try_search_half_rev( |
703 | &self, |
704 | cache: &mut HybridCache, |
705 | input: &Input<'_>, |
706 | ) -> Result<Option<HalfMatch>, RetryFailError> { |
707 | #[cfg (feature = "hybrid" )] |
708 | { |
709 | let rev = self.0.reverse(); |
710 | let mut revcache = cache.0.as_mut().unwrap().as_parts_mut().1; |
711 | rev.try_search_rev(&mut revcache, input).map_err(|e| e.into()) |
712 | } |
713 | #[cfg (not(feature = "hybrid" ))] |
714 | { |
715 | // Impossible to reach because this engine is never constructed |
716 | // if the requisite features aren't enabled. |
717 | unreachable!() |
718 | } |
719 | } |
720 | |
721 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
722 | pub(crate) fn try_search_half_rev_limited( |
723 | &self, |
724 | cache: &mut HybridCache, |
725 | input: &Input<'_>, |
726 | min_start: usize, |
727 | ) -> Result<Option<HalfMatch>, RetryError> { |
728 | #[cfg (feature = "hybrid" )] |
729 | { |
730 | let dfa = self.0.reverse(); |
731 | let mut cache = cache.0.as_mut().unwrap().as_parts_mut().1; |
732 | crate::meta::limited::hybrid_try_search_half_rev( |
733 | dfa, &mut cache, input, min_start, |
734 | ) |
735 | } |
736 | #[cfg (not(feature = "hybrid" ))] |
737 | { |
738 | // Impossible to reach because this engine is never constructed |
739 | // if the requisite features aren't enabled. |
740 | unreachable!() |
741 | } |
742 | } |
743 | |
744 | #[inline ] |
745 | pub(crate) fn try_which_overlapping_matches( |
746 | &self, |
747 | cache: &mut HybridCache, |
748 | input: &Input<'_>, |
749 | patset: &mut PatternSet, |
750 | ) -> Result<(), RetryFailError> { |
751 | #[cfg (feature = "hybrid" )] |
752 | { |
753 | let fwd = self.0.forward(); |
754 | let mut fwdcache = cache.0.as_mut().unwrap().as_parts_mut().0; |
755 | fwd.try_which_overlapping_matches(&mut fwdcache, input, patset) |
756 | .map_err(|e| e.into()) |
757 | } |
758 | #[cfg (not(feature = "hybrid" ))] |
759 | { |
760 | // Impossible to reach because this engine is never constructed |
761 | // if the requisite features aren't enabled. |
762 | unreachable!() |
763 | } |
764 | } |
765 | } |
766 | |
767 | #[derive(Clone, Debug)] |
768 | pub(crate) struct HybridCache( |
769 | #[cfg (feature = "hybrid" )] Option<hybrid::regex::Cache>, |
770 | #[cfg (not(feature = "hybrid" ))] (), |
771 | ); |
772 | |
773 | impl HybridCache { |
774 | pub(crate) fn none() -> HybridCache { |
775 | #[cfg (feature = "hybrid" )] |
776 | { |
777 | HybridCache(None) |
778 | } |
779 | #[cfg (not(feature = "hybrid" ))] |
780 | { |
781 | HybridCache(()) |
782 | } |
783 | } |
784 | |
785 | pub(crate) fn new(builder: &Hybrid) -> HybridCache { |
786 | #[cfg (feature = "hybrid" )] |
787 | { |
788 | HybridCache(builder.0.as_ref().map(|e| e.0.create_cache())) |
789 | } |
790 | #[cfg (not(feature = "hybrid" ))] |
791 | { |
792 | HybridCache(()) |
793 | } |
794 | } |
795 | |
796 | pub(crate) fn reset(&mut self, builder: &Hybrid) { |
797 | #[cfg (feature = "hybrid" )] |
798 | if let Some(ref e) = builder.0 { |
799 | self.0.as_mut().unwrap().reset(&e.0); |
800 | } |
801 | } |
802 | |
803 | pub(crate) fn memory_usage(&self) -> usize { |
804 | #[cfg (feature = "hybrid" )] |
805 | { |
806 | self.0.as_ref().map_or(0, |c| c.memory_usage()) |
807 | } |
808 | #[cfg (not(feature = "hybrid" ))] |
809 | { |
810 | 0 |
811 | } |
812 | } |
813 | } |
814 | |
815 | #[derive(Debug)] |
816 | pub(crate) struct DFA(Option<DFAEngine>); |
817 | |
818 | impl DFA { |
819 | pub(crate) fn none() -> DFA { |
820 | DFA(None) |
821 | } |
822 | |
823 | pub(crate) fn new( |
824 | info: &RegexInfo, |
825 | pre: Option<Prefilter>, |
826 | nfa: &NFA, |
827 | nfarev: &NFA, |
828 | ) -> DFA { |
829 | DFA(DFAEngine::new(info, pre, nfa, nfarev)) |
830 | } |
831 | |
832 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
833 | pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&DFAEngine> { |
834 | let engine = self.0.as_ref()?; |
835 | Some(engine) |
836 | } |
837 | |
838 | pub(crate) fn is_some(&self) -> bool { |
839 | self.0.is_some() |
840 | } |
841 | |
842 | pub(crate) fn memory_usage(&self) -> usize { |
843 | self.0.as_ref().map_or(0, |e| e.memory_usage()) |
844 | } |
845 | } |
846 | |
847 | #[derive(Debug)] |
848 | pub(crate) struct DFAEngine( |
849 | #[cfg (feature = "dfa-build" )] dfa::regex::Regex, |
850 | #[cfg (not(feature = "dfa-build" ))] (), |
851 | ); |
852 | |
853 | impl DFAEngine { |
854 | pub(crate) fn new( |
855 | info: &RegexInfo, |
856 | pre: Option<Prefilter>, |
857 | nfa: &NFA, |
858 | nfarev: &NFA, |
859 | ) -> Option<DFAEngine> { |
860 | #[cfg (feature = "dfa-build" )] |
861 | { |
862 | if !info.config().get_dfa() { |
863 | return None; |
864 | } |
865 | // If our NFA is anything but small, don't even bother with a DFA. |
866 | if let Some(state_limit) = info.config().get_dfa_state_limit() { |
867 | if nfa.states().len() > state_limit { |
868 | debug!( |
869 | "skipping full DFA because NFA has {} states, \ |
870 | which exceeds the heuristic limit of {}" , |
871 | nfa.states().len(), |
872 | state_limit, |
873 | ); |
874 | return None; |
875 | } |
876 | } |
877 | // We cut the size limit in four because the total heap used by |
878 | // DFA construction is determinization aux memory and the DFA |
879 | // itself, and those things are configured independently in the |
880 | // lower level DFA builder API. And then split that in two because |
881 | // of forward and reverse DFAs. |
882 | let size_limit = info.config().get_dfa_size_limit().map(|n| n / 4); |
883 | let dfa_config = dfa::dense::Config::new() |
884 | .match_kind(info.config().get_match_kind()) |
885 | .prefilter(pre.clone()) |
886 | // Enabling this is necessary for ensuring we can service any |
887 | // kind of 'Input' search without error. For the full DFA, this |
888 | // can be quite costly. But since we have such a small bound |
889 | // on the size of the DFA, in practice, any multl-regexes are |
890 | // probably going to blow the limit anyway. |
891 | .starts_for_each_pattern(true) |
892 | .byte_classes(info.config().get_byte_classes()) |
893 | .unicode_word_boundary(true) |
894 | .specialize_start_states(pre.is_some()) |
895 | .determinize_size_limit(size_limit) |
896 | .dfa_size_limit(size_limit); |
897 | let result = dfa::dense::Builder::new() |
898 | .configure(dfa_config.clone()) |
899 | .build_from_nfa(&nfa); |
900 | let fwd = match result { |
901 | Ok(fwd) => fwd, |
902 | Err(_err) => { |
903 | debug!("forward full DFA failed to build: {}" , _err); |
904 | return None; |
905 | } |
906 | }; |
907 | let result = dfa::dense::Builder::new() |
908 | .configure( |
909 | dfa_config |
910 | .clone() |
911 | // We never need unanchored reverse searches, so |
912 | // there's no point in building it into the DFA, which |
913 | // WILL take more space. (This isn't done for the lazy |
914 | // DFA because the DFA is, well, lazy. It doesn't pay |
915 | // the cost for supporting unanchored searches unless |
916 | // you actually do an unanchored search, which we |
917 | // don't.) |
918 | .start_kind(dfa::StartKind::Anchored) |
919 | .match_kind(MatchKind::All) |
920 | .prefilter(None) |
921 | .specialize_start_states(false), |
922 | ) |
923 | .build_from_nfa(&nfarev); |
924 | let rev = match result { |
925 | Ok(rev) => rev, |
926 | Err(_err) => { |
927 | debug!("reverse full DFA failed to build: {}" , _err); |
928 | return None; |
929 | } |
930 | }; |
931 | let engine = dfa::regex::Builder::new().build_from_dfas(fwd, rev); |
932 | debug!( |
933 | "fully compiled forward and reverse DFAs built, {} bytes" , |
934 | engine.forward().memory_usage() |
935 | + engine.reverse().memory_usage(), |
936 | ); |
937 | Some(DFAEngine(engine)) |
938 | } |
939 | #[cfg (not(feature = "dfa-build" ))] |
940 | { |
941 | None |
942 | } |
943 | } |
944 | |
945 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
946 | pub(crate) fn try_search( |
947 | &self, |
948 | input: &Input<'_>, |
949 | ) -> Result<Option<Match>, RetryFailError> { |
950 | #[cfg (feature = "dfa-build" )] |
951 | { |
952 | self.0.try_search(input).map_err(|e| e.into()) |
953 | } |
954 | #[cfg (not(feature = "dfa-build" ))] |
955 | { |
956 | // Impossible to reach because this engine is never constructed |
957 | // if the requisite features aren't enabled. |
958 | unreachable!() |
959 | } |
960 | } |
961 | |
962 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
963 | pub(crate) fn try_search_half_fwd( |
964 | &self, |
965 | input: &Input<'_>, |
966 | ) -> Result<Option<HalfMatch>, RetryFailError> { |
967 | #[cfg (feature = "dfa-build" )] |
968 | { |
969 | use crate::dfa::Automaton; |
970 | self.0.forward().try_search_fwd(input).map_err(|e| e.into()) |
971 | } |
972 | #[cfg (not(feature = "dfa-build" ))] |
973 | { |
974 | // Impossible to reach because this engine is never constructed |
975 | // if the requisite features aren't enabled. |
976 | unreachable!() |
977 | } |
978 | } |
979 | |
980 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
981 | pub(crate) fn try_search_half_fwd_stopat( |
982 | &self, |
983 | input: &Input<'_>, |
984 | ) -> Result<Result<HalfMatch, usize>, RetryFailError> { |
985 | #[cfg (feature = "dfa-build" )] |
986 | { |
987 | let dfa = self.0.forward(); |
988 | crate::meta::stopat::dfa_try_search_half_fwd(dfa, input) |
989 | } |
990 | #[cfg (not(feature = "dfa-build" ))] |
991 | { |
992 | // Impossible to reach because this engine is never constructed |
993 | // if the requisite features aren't enabled. |
994 | unreachable!() |
995 | } |
996 | } |
997 | |
998 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
999 | pub(crate) fn try_search_half_rev( |
1000 | &self, |
1001 | input: &Input<'_>, |
1002 | ) -> Result<Option<HalfMatch>, RetryFailError> { |
1003 | #[cfg (feature = "dfa-build" )] |
1004 | { |
1005 | use crate::dfa::Automaton; |
1006 | self.0.reverse().try_search_rev(&input).map_err(|e| e.into()) |
1007 | } |
1008 | #[cfg (not(feature = "dfa-build" ))] |
1009 | { |
1010 | // Impossible to reach because this engine is never constructed |
1011 | // if the requisite features aren't enabled. |
1012 | unreachable!() |
1013 | } |
1014 | } |
1015 | |
1016 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
1017 | pub(crate) fn try_search_half_rev_limited( |
1018 | &self, |
1019 | input: &Input<'_>, |
1020 | min_start: usize, |
1021 | ) -> Result<Option<HalfMatch>, RetryError> { |
1022 | #[cfg (feature = "dfa-build" )] |
1023 | { |
1024 | let dfa = self.0.reverse(); |
1025 | crate::meta::limited::dfa_try_search_half_rev( |
1026 | dfa, input, min_start, |
1027 | ) |
1028 | } |
1029 | #[cfg (not(feature = "dfa-build" ))] |
1030 | { |
1031 | // Impossible to reach because this engine is never constructed |
1032 | // if the requisite features aren't enabled. |
1033 | unreachable!() |
1034 | } |
1035 | } |
1036 | |
1037 | #[inline ] |
1038 | pub(crate) fn try_which_overlapping_matches( |
1039 | &self, |
1040 | input: &Input<'_>, |
1041 | patset: &mut PatternSet, |
1042 | ) -> Result<(), RetryFailError> { |
1043 | #[cfg (feature = "dfa-build" )] |
1044 | { |
1045 | use crate::dfa::Automaton; |
1046 | self.0 |
1047 | .forward() |
1048 | .try_which_overlapping_matches(input, patset) |
1049 | .map_err(|e| e.into()) |
1050 | } |
1051 | #[cfg (not(feature = "dfa-build" ))] |
1052 | { |
1053 | // Impossible to reach because this engine is never constructed |
1054 | // if the requisite features aren't enabled. |
1055 | unreachable!() |
1056 | } |
1057 | } |
1058 | |
1059 | pub(crate) fn memory_usage(&self) -> usize { |
1060 | #[cfg (feature = "dfa-build" )] |
1061 | { |
1062 | self.0.forward().memory_usage() + self.0.reverse().memory_usage() |
1063 | } |
1064 | #[cfg (not(feature = "dfa-build" ))] |
1065 | { |
1066 | // Impossible to reach because this engine is never constructed |
1067 | // if the requisite features aren't enabled. |
1068 | unreachable!() |
1069 | } |
1070 | } |
1071 | } |
1072 | |
1073 | #[derive(Debug)] |
1074 | pub(crate) struct ReverseHybrid(Option<ReverseHybridEngine>); |
1075 | |
1076 | impl ReverseHybrid { |
1077 | pub(crate) fn none() -> ReverseHybrid { |
1078 | ReverseHybrid(None) |
1079 | } |
1080 | |
1081 | pub(crate) fn new(info: &RegexInfo, nfarev: &NFA) -> ReverseHybrid { |
1082 | ReverseHybrid(ReverseHybridEngine::new(info, nfarev)) |
1083 | } |
1084 | |
1085 | pub(crate) fn create_cache(&self) -> ReverseHybridCache { |
1086 | ReverseHybridCache::new(self) |
1087 | } |
1088 | |
1089 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
1090 | pub(crate) fn get( |
1091 | &self, |
1092 | _input: &Input<'_>, |
1093 | ) -> Option<&ReverseHybridEngine> { |
1094 | let engine = self.0.as_ref()?; |
1095 | Some(engine) |
1096 | } |
1097 | } |
1098 | |
1099 | #[derive(Debug)] |
1100 | pub(crate) struct ReverseHybridEngine( |
1101 | #[cfg (feature = "hybrid" )] hybrid::dfa::DFA, |
1102 | #[cfg (not(feature = "hybrid" ))] (), |
1103 | ); |
1104 | |
1105 | impl ReverseHybridEngine { |
1106 | pub(crate) fn new( |
1107 | info: &RegexInfo, |
1108 | nfarev: &NFA, |
1109 | ) -> Option<ReverseHybridEngine> { |
1110 | #[cfg (feature = "hybrid" )] |
1111 | { |
1112 | if !info.config().get_hybrid() { |
1113 | return None; |
1114 | } |
1115 | // Since we only use this for reverse searches, we can hard-code |
1116 | // a number of things like match semantics, prefilters, starts |
1117 | // for each pattern and so on. |
1118 | let dfa_config = hybrid::dfa::Config::new() |
1119 | .match_kind(MatchKind::All) |
1120 | .prefilter(None) |
1121 | .starts_for_each_pattern(false) |
1122 | .byte_classes(info.config().get_byte_classes()) |
1123 | .unicode_word_boundary(true) |
1124 | .specialize_start_states(false) |
1125 | .cache_capacity(info.config().get_hybrid_cache_capacity()) |
1126 | .skip_cache_capacity_check(false) |
1127 | .minimum_cache_clear_count(Some(3)) |
1128 | .minimum_bytes_per_state(Some(10)); |
1129 | let result = hybrid::dfa::Builder::new() |
1130 | .configure(dfa_config) |
1131 | .build_from_nfa(nfarev.clone()); |
1132 | let rev = match result { |
1133 | Ok(rev) => rev, |
1134 | Err(_err) => { |
1135 | debug!("lazy reverse DFA failed to build: {}" , _err); |
1136 | return None; |
1137 | } |
1138 | }; |
1139 | debug!("lazy reverse DFA built" ); |
1140 | Some(ReverseHybridEngine(rev)) |
1141 | } |
1142 | #[cfg (not(feature = "hybrid" ))] |
1143 | { |
1144 | None |
1145 | } |
1146 | } |
1147 | |
1148 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
1149 | pub(crate) fn try_search_half_rev_limited( |
1150 | &self, |
1151 | cache: &mut ReverseHybridCache, |
1152 | input: &Input<'_>, |
1153 | min_start: usize, |
1154 | ) -> Result<Option<HalfMatch>, RetryError> { |
1155 | #[cfg (feature = "hybrid" )] |
1156 | { |
1157 | let dfa = &self.0; |
1158 | let mut cache = cache.0.as_mut().unwrap(); |
1159 | crate::meta::limited::hybrid_try_search_half_rev( |
1160 | dfa, &mut cache, input, min_start, |
1161 | ) |
1162 | } |
1163 | #[cfg (not(feature = "hybrid" ))] |
1164 | { |
1165 | // Impossible to reach because this engine is never constructed |
1166 | // if the requisite features aren't enabled. |
1167 | unreachable!() |
1168 | } |
1169 | } |
1170 | } |
1171 | |
1172 | #[derive(Clone, Debug)] |
1173 | pub(crate) struct ReverseHybridCache( |
1174 | #[cfg (feature = "hybrid" )] Option<hybrid::dfa::Cache>, |
1175 | #[cfg (not(feature = "hybrid" ))] (), |
1176 | ); |
1177 | |
1178 | impl ReverseHybridCache { |
1179 | pub(crate) fn none() -> ReverseHybridCache { |
1180 | #[cfg (feature = "hybrid" )] |
1181 | { |
1182 | ReverseHybridCache(None) |
1183 | } |
1184 | #[cfg (not(feature = "hybrid" ))] |
1185 | { |
1186 | ReverseHybridCache(()) |
1187 | } |
1188 | } |
1189 | |
1190 | pub(crate) fn new(builder: &ReverseHybrid) -> ReverseHybridCache { |
1191 | #[cfg (feature = "hybrid" )] |
1192 | { |
1193 | ReverseHybridCache(builder.0.as_ref().map(|e| e.0.create_cache())) |
1194 | } |
1195 | #[cfg (not(feature = "hybrid" ))] |
1196 | { |
1197 | ReverseHybridCache(()) |
1198 | } |
1199 | } |
1200 | |
1201 | pub(crate) fn reset(&mut self, builder: &ReverseHybrid) { |
1202 | #[cfg (feature = "hybrid" )] |
1203 | if let Some(ref e) = builder.0 { |
1204 | self.0.as_mut().unwrap().reset(&e.0); |
1205 | } |
1206 | } |
1207 | |
1208 | pub(crate) fn memory_usage(&self) -> usize { |
1209 | #[cfg (feature = "hybrid" )] |
1210 | { |
1211 | self.0.as_ref().map_or(0, |c| c.memory_usage()) |
1212 | } |
1213 | #[cfg (not(feature = "hybrid" ))] |
1214 | { |
1215 | 0 |
1216 | } |
1217 | } |
1218 | } |
1219 | |
1220 | #[derive(Debug)] |
1221 | pub(crate) struct ReverseDFA(Option<ReverseDFAEngine>); |
1222 | |
1223 | impl ReverseDFA { |
1224 | pub(crate) fn none() -> ReverseDFA { |
1225 | ReverseDFA(None) |
1226 | } |
1227 | |
1228 | pub(crate) fn new(info: &RegexInfo, nfarev: &NFA) -> ReverseDFA { |
1229 | ReverseDFA(ReverseDFAEngine::new(info, nfarev)) |
1230 | } |
1231 | |
1232 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
1233 | pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&ReverseDFAEngine> { |
1234 | let engine = self.0.as_ref()?; |
1235 | Some(engine) |
1236 | } |
1237 | |
1238 | pub(crate) fn is_some(&self) -> bool { |
1239 | self.0.is_some() |
1240 | } |
1241 | |
1242 | pub(crate) fn memory_usage(&self) -> usize { |
1243 | self.0.as_ref().map_or(0, |e| e.memory_usage()) |
1244 | } |
1245 | } |
1246 | |
1247 | #[derive(Debug)] |
1248 | pub(crate) struct ReverseDFAEngine( |
1249 | #[cfg (feature = "dfa-build" )] dfa::dense::DFA<Vec<u32>>, |
1250 | #[cfg (not(feature = "dfa-build" ))] (), |
1251 | ); |
1252 | |
1253 | impl ReverseDFAEngine { |
1254 | pub(crate) fn new( |
1255 | info: &RegexInfo, |
1256 | nfarev: &NFA, |
1257 | ) -> Option<ReverseDFAEngine> { |
1258 | #[cfg (feature = "dfa-build" )] |
1259 | { |
1260 | if !info.config().get_dfa() { |
1261 | return None; |
1262 | } |
1263 | // If our NFA is anything but small, don't even bother with a DFA. |
1264 | if let Some(state_limit) = info.config().get_dfa_state_limit() { |
1265 | if nfarev.states().len() > state_limit { |
1266 | debug!( |
1267 | "skipping full reverse DFA because NFA has {} states, \ |
1268 | which exceeds the heuristic limit of {}" , |
1269 | nfarev.states().len(), |
1270 | state_limit, |
1271 | ); |
1272 | return None; |
1273 | } |
1274 | } |
1275 | // We cut the size limit in two because the total heap used by DFA |
1276 | // construction is determinization aux memory and the DFA itself, |
1277 | // and those things are configured independently in the lower level |
1278 | // DFA builder API. |
1279 | let size_limit = info.config().get_dfa_size_limit().map(|n| n / 2); |
1280 | // Since we only use this for reverse searches, we can hard-code |
1281 | // a number of things like match semantics, prefilters, starts |
1282 | // for each pattern and so on. We also disable acceleration since |
1283 | // it's incompatible with limited searches (which is the only |
1284 | // operation we support for this kind of engine at the moment). |
1285 | let dfa_config = dfa::dense::Config::new() |
1286 | .match_kind(MatchKind::All) |
1287 | .prefilter(None) |
1288 | .accelerate(false) |
1289 | .start_kind(dfa::StartKind::Anchored) |
1290 | .starts_for_each_pattern(false) |
1291 | .byte_classes(info.config().get_byte_classes()) |
1292 | .unicode_word_boundary(true) |
1293 | .specialize_start_states(false) |
1294 | .determinize_size_limit(size_limit) |
1295 | .dfa_size_limit(size_limit); |
1296 | let result = dfa::dense::Builder::new() |
1297 | .configure(dfa_config) |
1298 | .build_from_nfa(&nfarev); |
1299 | let rev = match result { |
1300 | Ok(rev) => rev, |
1301 | Err(_err) => { |
1302 | debug!("full reverse DFA failed to build: {}" , _err); |
1303 | return None; |
1304 | } |
1305 | }; |
1306 | debug!( |
1307 | "fully compiled reverse DFA built, {} bytes" , |
1308 | rev.memory_usage() |
1309 | ); |
1310 | Some(ReverseDFAEngine(rev)) |
1311 | } |
1312 | #[cfg (not(feature = "dfa-build" ))] |
1313 | { |
1314 | None |
1315 | } |
1316 | } |
1317 | |
1318 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
1319 | pub(crate) fn try_search_half_rev_limited( |
1320 | &self, |
1321 | input: &Input<'_>, |
1322 | min_start: usize, |
1323 | ) -> Result<Option<HalfMatch>, RetryError> { |
1324 | #[cfg (feature = "dfa-build" )] |
1325 | { |
1326 | let dfa = &self.0; |
1327 | crate::meta::limited::dfa_try_search_half_rev( |
1328 | dfa, input, min_start, |
1329 | ) |
1330 | } |
1331 | #[cfg (not(feature = "dfa-build" ))] |
1332 | { |
1333 | // Impossible to reach because this engine is never constructed |
1334 | // if the requisite features aren't enabled. |
1335 | unreachable!() |
1336 | } |
1337 | } |
1338 | |
1339 | pub(crate) fn memory_usage(&self) -> usize { |
1340 | #[cfg (feature = "dfa-build" )] |
1341 | { |
1342 | self.0.memory_usage() |
1343 | } |
1344 | #[cfg (not(feature = "dfa-build" ))] |
1345 | { |
1346 | // Impossible to reach because this engine is never constructed |
1347 | // if the requisite features aren't enabled. |
1348 | unreachable!() |
1349 | } |
1350 | } |
1351 | } |
1352 | |