1use crate::{
2 hybrid::{
3 dfa::{Cache, OverlappingState, DFA},
4 id::LazyStateID,
5 },
6 util::{
7 prefilter::Prefilter,
8 search::{HalfMatch, Input, MatchError, Span},
9 },
10};
11
12#[inline(never)]
13pub(crate) fn find_fwd(
14 dfa: &DFA,
15 cache: &mut Cache,
16 input: &Input<'_>,
17) -> Result<Option<HalfMatch>, MatchError> {
18 if input.is_done() {
19 return Ok(None);
20 }
21 let pre = if input.get_anchored().is_anchored() {
22 None
23 } else {
24 dfa.get_config().get_prefilter()
25 };
26 // So what we do here is specialize four different versions of 'find_fwd':
27 // one for each of the combinations for 'has prefilter' and 'is earliest
28 // search'. The reason for doing this is that both of these things require
29 // branches and special handling in some code that can be very hot,
30 // and shaving off as much as we can when we don't need it tends to be
31 // beneficial in ad hoc benchmarks. To see these differences, you often
32 // need a query with a high match count. In other words, specializing these
33 // four routines *tends* to help latency more than throughput.
34 if pre.is_some() {
35 if input.get_earliest() {
36 find_fwd_imp(dfa, cache, input, pre, true)
37 } else {
38 find_fwd_imp(dfa, cache, input, pre, false)
39 }
40 } else {
41 if input.get_earliest() {
42 find_fwd_imp(dfa, cache, input, None, true)
43 } else {
44 find_fwd_imp(dfa, cache, input, None, false)
45 }
46 }
47}
48
49#[cfg_attr(feature = "perf-inline", inline(always))]
50fn find_fwd_imp(
51 dfa: &DFA,
52 cache: &mut Cache,
53 input: &Input<'_>,
54 pre: Option<&'_ Prefilter>,
55 earliest: bool,
56) -> Result<Option<HalfMatch>, MatchError> {
57 // See 'prefilter_restart' docs for explanation.
58 let universal_start = dfa.get_nfa().look_set_prefix_any().is_empty();
59 let mut mat = None;
60 let mut sid = init_fwd(dfa, cache, input)?;
61 let mut at = input.start();
62 // This could just be a closure, but then I think it would be unsound
63 // because it would need to be safe to invoke. This way, the lack of safety
64 // is clearer in the code below.
65 macro_rules! next_unchecked {
66 ($sid:expr, $at:expr) => {{
67 let byte = *input.haystack().get_unchecked($at);
68 dfa.next_state_untagged_unchecked(cache, $sid, byte)
69 }};
70 }
71
72 if let Some(ref pre) = pre {
73 let span = Span::from(at..input.end());
74 match pre.find(input.haystack(), span) {
75 None => return Ok(mat),
76 Some(ref span) => {
77 at = span.start;
78 if !universal_start {
79 sid = prefilter_restart(dfa, cache, &input, at)?;
80 }
81 }
82 }
83 }
84 cache.search_start(at);
85 while at < input.end() {
86 if sid.is_tagged() {
87 cache.search_update(at);
88 sid = dfa
89 .next_state(cache, sid, input.haystack()[at])
90 .map_err(|_| gave_up(at))?;
91 } else {
92 // SAFETY: There are two safety invariants we need to uphold
93 // here in the loops below: that 'sid' and 'prev_sid' are valid
94 // state IDs for this DFA, and that 'at' is a valid index into
95 // 'haystack'. For the former, we rely on the invariant that
96 // next_state* and start_state_forward always returns a valid state
97 // ID (given a valid state ID in the former case), and that we are
98 // only at this place in the code if 'sid' is untagged. Moreover,
99 // every call to next_state_untagged_unchecked below is guarded by
100 // a check that sid is untagged. For the latter safety invariant,
101 // we always guard unchecked access with a check that 'at' is less
102 // than 'end', where 'end <= haystack.len()'. In the unrolled loop
103 // below, we ensure that 'at' is always in bounds.
104 //
105 // PERF: For justification of omitting bounds checks, it gives us a
106 // ~10% bump in search time. This was used for a benchmark:
107 //
108 // regex-cli find half hybrid -p '(?m)^.+$' -UBb bigfile
109 //
110 // PERF: For justification for the loop unrolling, we use a few
111 // different tests:
112 //
113 // regex-cli find half hybrid -p '\w{50}' -UBb bigfile
114 // regex-cli find half hybrid -p '(?m)^.+$' -UBb bigfile
115 // regex-cli find half hybrid -p 'ZQZQZQZQ' -UBb bigfile
116 //
117 // And there are three different configurations:
118 //
119 // nounroll: this entire 'else' block vanishes and we just
120 // always use 'dfa.next_state(..)'.
121 // unroll1: just the outer loop below
122 // unroll2: just the inner loop below
123 // unroll3: both the outer and inner loops below
124 //
125 // This results in a matrix of timings for each of the above
126 // regexes with each of the above unrolling configurations:
127 //
128 // '\w{50}' '(?m)^.+$' 'ZQZQZQZQ'
129 // nounroll 1.51s 2.34s 1.51s
130 // unroll1 1.53s 2.32s 1.56s
131 // unroll2 2.22s 1.50s 0.61s
132 // unroll3 1.67s 1.45s 0.61s
133 //
134 // Ideally we'd be able to find a configuration that yields the
135 // best time for all regexes, but alas we settle for unroll3 that
136 // gives us *almost* the best for '\w{50}' and the best for the
137 // other two regexes.
138 //
139 // So what exactly is going on here? The first unrolling (grouping
140 // together runs of untagged transitions) specifically targets
141 // our choice of representation. The second unrolling (grouping
142 // together runs of self-transitions) specifically targets a common
143 // DFA topology. Let's dig in a little bit by looking at our
144 // regexes:
145 //
146 // '\w{50}': This regex spends a lot of time outside of the DFA's
147 // start state matching some part of the '\w' repetition. This
148 // means that it's a bit of a worst case for loop unrolling that
149 // targets self-transitions since the self-transitions in '\w{50}'
150 // are not particularly active for this haystack. However, the
151 // first unrolling (grouping together untagged transitions)
152 // does apply quite well here since very few transitions hit
153 // match/dead/quit/unknown states. It is however worth mentioning
154 // that if start states are configured to be tagged (which you
155 // typically want to do if you have a prefilter), then this regex
156 // actually slows way down because it is constantly ping-ponging
157 // out of the unrolled loop and into the handling of a tagged start
158 // state below. But when start states aren't tagged, the unrolled
159 // loop stays hot. (This is why it's imperative that start state
160 // tagging be disabled when there isn't a prefilter!)
161 //
162 // '(?m)^.+$': There are two important aspects of this regex: 1)
163 // on this haystack, its match count is very high, much higher
164 // than the other two regex and 2) it spends the vast majority
165 // of its time matching '.+'. Since Unicode mode is disabled,
166 // this corresponds to repeatedly following self transitions for
167 // the vast majority of the input. This does benefit from the
168 // untagged unrolling since most of the transitions will be to
169 // untagged states, but the untagged unrolling does more work than
170 // what is actually required. Namely, it has to keep track of the
171 // previous and next state IDs, which I guess requires a bit more
172 // shuffling. This is supported by the fact that nounroll+unroll1
173 // are both slower than unroll2+unroll3, where the latter has a
174 // loop unrolling that specifically targets self-transitions.
175 //
176 // 'ZQZQZQZQ': This one is very similar to '(?m)^.+$' because it
177 // spends the vast majority of its time in self-transitions for
178 // the (implicit) unanchored prefix. The main difference with
179 // '(?m)^.+$' is that it has a much lower match count. So there
180 // isn't much time spent in the overhead of reporting matches. This
181 // is the primary explainer in the perf difference here. We include
182 // this regex and the former to make sure we have comparison points
183 // with high and low match counts.
184 //
185 // NOTE: I used 'OpenSubtitles2018.raw.sample.en' for 'bigfile'.
186 //
187 // NOTE: In a follow-up, it turns out that the "inner" loop
188 // mentioned above was a pretty big pessimization in some other
189 // cases. Namely, it resulted in too much ping-ponging into and out
190 // of the loop, which resulted in nearly ~2x regressions in search
191 // time when compared to the originally lazy DFA in the regex crate.
192 // So I've removed the second loop unrolling that targets the
193 // self-transition case.
194 let mut prev_sid = sid;
195 while at < input.end() {
196 prev_sid = unsafe { next_unchecked!(sid, at) };
197 if prev_sid.is_tagged() || at + 3 >= input.end() {
198 core::mem::swap(&mut prev_sid, &mut sid);
199 break;
200 }
201 at += 1;
202
203 sid = unsafe { next_unchecked!(prev_sid, at) };
204 if sid.is_tagged() {
205 break;
206 }
207 at += 1;
208
209 prev_sid = unsafe { next_unchecked!(sid, at) };
210 if prev_sid.is_tagged() {
211 core::mem::swap(&mut prev_sid, &mut sid);
212 break;
213 }
214 at += 1;
215
216 sid = unsafe { next_unchecked!(prev_sid, at) };
217 if sid.is_tagged() {
218 break;
219 }
220 at += 1;
221 }
222 // If we quit out of the code above with an unknown state ID at
223 // any point, then we need to re-compute that transition using
224 // 'next_state', which will do NFA powerset construction for us.
225 if sid.is_unknown() {
226 cache.search_update(at);
227 sid = dfa
228 .next_state(cache, prev_sid, input.haystack()[at])
229 .map_err(|_| gave_up(at))?;
230 }
231 }
232 if sid.is_tagged() {
233 if sid.is_start() {
234 if let Some(ref pre) = pre {
235 let span = Span::from(at..input.end());
236 match pre.find(input.haystack(), span) {
237 None => {
238 cache.search_finish(span.end);
239 return Ok(mat);
240 }
241 Some(ref span) => {
242 // We want to skip any update to 'at' below
243 // at the end of this iteration and just
244 // jump immediately back to the next state
245 // transition at the leading position of the
246 // candidate match.
247 //
248 // ... but only if we actually made progress
249 // with our prefilter, otherwise if the start
250 // state has a self-loop, we can get stuck.
251 if span.start > at {
252 at = span.start;
253 if !universal_start {
254 sid = prefilter_restart(
255 dfa, cache, &input, at,
256 )?;
257 }
258 continue;
259 }
260 }
261 }
262 }
263 } else if sid.is_match() {
264 let pattern = dfa.match_pattern(cache, sid, 0);
265 // Since slice ranges are inclusive at the beginning and
266 // exclusive at the end, and since forward searches report
267 // the end, we can return 'at' as-is. This only works because
268 // matches are delayed by 1 byte. So by the time we observe a
269 // match, 'at' has already been set to 1 byte past the actual
270 // match location, which is precisely the exclusive ending
271 // bound of the match.
272 mat = Some(HalfMatch::new(pattern, at));
273 if earliest {
274 cache.search_finish(at);
275 return Ok(mat);
276 }
277 } else if sid.is_dead() {
278 cache.search_finish(at);
279 return Ok(mat);
280 } else if sid.is_quit() {
281 cache.search_finish(at);
282 return Err(MatchError::quit(input.haystack()[at], at));
283 } else {
284 debug_assert!(sid.is_unknown());
285 unreachable!("sid being unknown is a bug");
286 }
287 }
288 at += 1;
289 }
290 eoi_fwd(dfa, cache, input, &mut sid, &mut mat)?;
291 cache.search_finish(input.end());
292 Ok(mat)
293}
294
295#[inline(never)]
296pub(crate) fn find_rev(
297 dfa: &DFA,
298 cache: &mut Cache,
299 input: &Input<'_>,
300) -> Result<Option<HalfMatch>, MatchError> {
301 if input.is_done() {
302 return Ok(None);
303 }
304 if input.get_earliest() {
305 find_rev_imp(dfa, cache, input, true)
306 } else {
307 find_rev_imp(dfa, cache, input, false)
308 }
309}
310
311#[cfg_attr(feature = "perf-inline", inline(always))]
312fn find_rev_imp(
313 dfa: &DFA,
314 cache: &mut Cache,
315 input: &Input<'_>,
316 earliest: bool,
317) -> Result<Option<HalfMatch>, MatchError> {
318 let mut mat = None;
319 let mut sid = init_rev(dfa, cache, input)?;
320 // In reverse search, the loop below can't handle the case of searching an
321 // empty slice. Ideally we could write something congruent to the forward
322 // search, i.e., 'while at >= start', but 'start' might be 0. Since we use
323 // an unsigned offset, 'at >= 0' is trivially always true. We could avoid
324 // this extra case handling by using a signed offset, but Rust makes it
325 // annoying to do. So... We just handle the empty case separately.
326 if input.start() == input.end() {
327 eoi_rev(dfa, cache, input, &mut sid, &mut mat)?;
328 return Ok(mat);
329 }
330
331 let mut at = input.end() - 1;
332 macro_rules! next_unchecked {
333 ($sid:expr, $at:expr) => {{
334 let byte = *input.haystack().get_unchecked($at);
335 dfa.next_state_untagged_unchecked(cache, $sid, byte)
336 }};
337 }
338 cache.search_start(at);
339 loop {
340 if sid.is_tagged() {
341 cache.search_update(at);
342 sid = dfa
343 .next_state(cache, sid, input.haystack()[at])
344 .map_err(|_| gave_up(at))?;
345 } else {
346 // SAFETY: See comments in 'find_fwd' for a safety argument.
347 //
348 // PERF: The comments in 'find_fwd' also provide a justification
349 // from a performance perspective as to 1) why we elide bounds
350 // checks and 2) why we do a specialized version of unrolling
351 // below. The reverse search does have a slightly different
352 // consideration in that most reverse searches tend to be
353 // anchored and on shorter haystacks. However, this still makes a
354 // difference. Take this command for example:
355 //
356 // regex-cli find match hybrid -p '(?m)^.+$' -UBb bigfile
357 //
358 // (Notice that we use 'find hybrid regex', not 'find hybrid dfa'
359 // like in the justification for the forward direction. The 'regex'
360 // sub-command will find start-of-match and thus run the reverse
361 // direction.)
362 //
363 // Without unrolling below, the above command takes around 3.76s.
364 // But with the unrolling below, we get down to 2.55s. If we keep
365 // the unrolling but add in bounds checks, then we get 2.86s.
366 //
367 // NOTE: I used 'OpenSubtitles2018.raw.sample.en' for 'bigfile'.
368 let mut prev_sid = sid;
369 while at >= input.start() {
370 prev_sid = unsafe { next_unchecked!(sid, at) };
371 if prev_sid.is_tagged()
372 || at <= input.start().saturating_add(3)
373 {
374 core::mem::swap(&mut prev_sid, &mut sid);
375 break;
376 }
377 at -= 1;
378
379 sid = unsafe { next_unchecked!(prev_sid, at) };
380 if sid.is_tagged() {
381 break;
382 }
383 at -= 1;
384
385 prev_sid = unsafe { next_unchecked!(sid, at) };
386 if prev_sid.is_tagged() {
387 core::mem::swap(&mut prev_sid, &mut sid);
388 break;
389 }
390 at -= 1;
391
392 sid = unsafe { next_unchecked!(prev_sid, at) };
393 if sid.is_tagged() {
394 break;
395 }
396 at -= 1;
397 }
398 // If we quit out of the code above with an unknown state ID at
399 // any point, then we need to re-compute that transition using
400 // 'next_state', which will do NFA powerset construction for us.
401 if sid.is_unknown() {
402 cache.search_update(at);
403 sid = dfa
404 .next_state(cache, prev_sid, input.haystack()[at])
405 .map_err(|_| gave_up(at))?;
406 }
407 }
408 if sid.is_tagged() {
409 if sid.is_start() {
410 // do nothing
411 } else if sid.is_match() {
412 let pattern = dfa.match_pattern(cache, sid, 0);
413 // Since reverse searches report the beginning of a match
414 // and the beginning is inclusive (not exclusive like the
415 // end of a match), we add 1 to make it inclusive.
416 mat = Some(HalfMatch::new(pattern, at + 1));
417 if earliest {
418 cache.search_finish(at);
419 return Ok(mat);
420 }
421 } else if sid.is_dead() {
422 cache.search_finish(at);
423 return Ok(mat);
424 } else if sid.is_quit() {
425 cache.search_finish(at);
426 return Err(MatchError::quit(input.haystack()[at], at));
427 } else {
428 debug_assert!(sid.is_unknown());
429 unreachable!("sid being unknown is a bug");
430 }
431 }
432 if at == input.start() {
433 break;
434 }
435 at -= 1;
436 }
437 cache.search_finish(input.start());
438 eoi_rev(dfa, cache, input, &mut sid, &mut mat)?;
439 Ok(mat)
440}
441
442#[inline(never)]
443pub(crate) fn find_overlapping_fwd(
444 dfa: &DFA,
445 cache: &mut Cache,
446 input: &Input<'_>,
447 state: &mut OverlappingState,
448) -> Result<(), MatchError> {
449 state.mat = None;
450 if input.is_done() {
451 return Ok(());
452 }
453 let pre = if input.get_anchored().is_anchored() {
454 None
455 } else {
456 dfa.get_config().get_prefilter()
457 };
458 if pre.is_some() {
459 find_overlapping_fwd_imp(dfa, cache, input, pre, state)
460 } else {
461 find_overlapping_fwd_imp(dfa, cache, input, None, state)
462 }
463}
464
465#[cfg_attr(feature = "perf-inline", inline(always))]
466fn find_overlapping_fwd_imp(
467 dfa: &DFA,
468 cache: &mut Cache,
469 input: &Input<'_>,
470 pre: Option<&'_ Prefilter>,
471 state: &mut OverlappingState,
472) -> Result<(), MatchError> {
473 // See 'prefilter_restart' docs for explanation.
474 let universal_start = dfa.get_nfa().look_set_prefix_any().is_empty();
475 let mut sid = match state.id {
476 None => {
477 state.at = input.start();
478 init_fwd(dfa, cache, input)?
479 }
480 Some(sid) => {
481 if let Some(match_index) = state.next_match_index {
482 let match_len = dfa.match_len(cache, sid);
483 if match_index < match_len {
484 state.next_match_index = Some(match_index + 1);
485 let pattern = dfa.match_pattern(cache, sid, match_index);
486 state.mat = Some(HalfMatch::new(pattern, state.at));
487 return Ok(());
488 }
489 }
490 // Once we've reported all matches at a given position, we need to
491 // advance the search to the next position.
492 state.at += 1;
493 if state.at > input.end() {
494 return Ok(());
495 }
496 sid
497 }
498 };
499
500 // NOTE: We don't optimize the crap out of this routine primarily because
501 // it seems like most overlapping searches will have higher match counts,
502 // and thus, throughput is perhaps not as important. But if you have a use
503 // case for something faster, feel free to file an issue.
504 cache.search_start(state.at);
505 while state.at < input.end() {
506 sid = dfa
507 .next_state(cache, sid, input.haystack()[state.at])
508 .map_err(|_| gave_up(state.at))?;
509 if sid.is_tagged() {
510 state.id = Some(sid);
511 if sid.is_start() {
512 if let Some(ref pre) = pre {
513 let span = Span::from(state.at..input.end());
514 match pre.find(input.haystack(), span) {
515 None => return Ok(()),
516 Some(ref span) => {
517 if span.start > state.at {
518 state.at = span.start;
519 if !universal_start {
520 sid = prefilter_restart(
521 dfa, cache, &input, state.at,
522 )?;
523 }
524 continue;
525 }
526 }
527 }
528 }
529 } else if sid.is_match() {
530 state.next_match_index = Some(1);
531 let pattern = dfa.match_pattern(cache, sid, 0);
532 state.mat = Some(HalfMatch::new(pattern, state.at));
533 cache.search_finish(state.at);
534 return Ok(());
535 } else if sid.is_dead() {
536 cache.search_finish(state.at);
537 return Ok(());
538 } else if sid.is_quit() {
539 cache.search_finish(state.at);
540 return Err(MatchError::quit(
541 input.haystack()[state.at],
542 state.at,
543 ));
544 } else {
545 debug_assert!(sid.is_unknown());
546 unreachable!("sid being unknown is a bug");
547 }
548 }
549 state.at += 1;
550 cache.search_update(state.at);
551 }
552
553 let result = eoi_fwd(dfa, cache, input, &mut sid, &mut state.mat);
554 state.id = Some(sid);
555 if state.mat.is_some() {
556 // '1' is always correct here since if we get to this point, this
557 // always corresponds to the first (index '0') match discovered at
558 // this position. So the next match to report at this position (if
559 // it exists) is at index '1'.
560 state.next_match_index = Some(1);
561 }
562 cache.search_finish(input.end());
563 result
564}
565
566#[inline(never)]
567pub(crate) fn find_overlapping_rev(
568 dfa: &DFA,
569 cache: &mut Cache,
570 input: &Input<'_>,
571 state: &mut OverlappingState,
572) -> Result<(), MatchError> {
573 state.mat = None;
574 if input.is_done() {
575 return Ok(());
576 }
577 let mut sid = match state.id {
578 None => {
579 let sid = init_rev(dfa, cache, input)?;
580 state.id = Some(sid);
581 if input.start() == input.end() {
582 state.rev_eoi = true;
583 } else {
584 state.at = input.end() - 1;
585 }
586 sid
587 }
588 Some(sid) => {
589 if let Some(match_index) = state.next_match_index {
590 let match_len = dfa.match_len(cache, sid);
591 if match_index < match_len {
592 state.next_match_index = Some(match_index + 1);
593 let pattern = dfa.match_pattern(cache, sid, match_index);
594 state.mat = Some(HalfMatch::new(pattern, state.at));
595 return Ok(());
596 }
597 }
598 // Once we've reported all matches at a given position, we need
599 // to advance the search to the next position. However, if we've
600 // already followed the EOI transition, then we know we're done
601 // with the search and there cannot be any more matches to report.
602 if state.rev_eoi {
603 return Ok(());
604 } else if state.at == input.start() {
605 // At this point, we should follow the EOI transition. This
606 // will cause us the skip the main loop below and fall through
607 // to the final 'eoi_rev' transition.
608 state.rev_eoi = true;
609 } else {
610 // We haven't hit the end of the search yet, so move on.
611 state.at -= 1;
612 }
613 sid
614 }
615 };
616 cache.search_start(state.at);
617 while !state.rev_eoi {
618 sid = dfa
619 .next_state(cache, sid, input.haystack()[state.at])
620 .map_err(|_| gave_up(state.at))?;
621 if sid.is_tagged() {
622 state.id = Some(sid);
623 if sid.is_start() {
624 // do nothing
625 } else if sid.is_match() {
626 state.next_match_index = Some(1);
627 let pattern = dfa.match_pattern(cache, sid, 0);
628 state.mat = Some(HalfMatch::new(pattern, state.at + 1));
629 cache.search_finish(state.at);
630 return Ok(());
631 } else if sid.is_dead() {
632 cache.search_finish(state.at);
633 return Ok(());
634 } else if sid.is_quit() {
635 cache.search_finish(state.at);
636 return Err(MatchError::quit(
637 input.haystack()[state.at],
638 state.at,
639 ));
640 } else {
641 debug_assert!(sid.is_unknown());
642 unreachable!("sid being unknown is a bug");
643 }
644 }
645 if state.at == input.start() {
646 break;
647 }
648 state.at -= 1;
649 cache.search_update(state.at);
650 }
651
652 let result = eoi_rev(dfa, cache, input, &mut sid, &mut state.mat);
653 state.rev_eoi = true;
654 state.id = Some(sid);
655 if state.mat.is_some() {
656 // '1' is always correct here since if we get to this point, this
657 // always corresponds to the first (index '0') match discovered at
658 // this position. So the next match to report at this position (if
659 // it exists) is at index '1'.
660 state.next_match_index = Some(1);
661 }
662 cache.search_finish(input.start());
663 result
664}
665
666#[cfg_attr(feature = "perf-inline", inline(always))]
667fn init_fwd(
668 dfa: &DFA,
669 cache: &mut Cache,
670 input: &Input<'_>,
671) -> Result<LazyStateID, MatchError> {
672 let sid = dfa.start_state_forward(cache, input)?;
673 // Start states can never be match states, since all matches are delayed
674 // by 1 byte.
675 debug_assert!(!sid.is_match());
676 Ok(sid)
677}
678
679#[cfg_attr(feature = "perf-inline", inline(always))]
680fn init_rev(
681 dfa: &DFA,
682 cache: &mut Cache,
683 input: &Input<'_>,
684) -> Result<LazyStateID, MatchError> {
685 let sid = dfa.start_state_reverse(cache, input)?;
686 // Start states can never be match states, since all matches are delayed
687 // by 1 byte.
688 debug_assert!(!sid.is_match());
689 Ok(sid)
690}
691
692#[cfg_attr(feature = "perf-inline", inline(always))]
693fn eoi_fwd(
694 dfa: &DFA,
695 cache: &mut Cache,
696 input: &Input<'_>,
697 sid: &mut LazyStateID,
698 mat: &mut Option<HalfMatch>,
699) -> Result<(), MatchError> {
700 let sp = input.get_span();
701 match input.haystack().get(sp.end) {
702 Some(&b) => {
703 *sid =
704 dfa.next_state(cache, *sid, b).map_err(|_| gave_up(sp.end))?;
705 if sid.is_match() {
706 let pattern = dfa.match_pattern(cache, *sid, 0);
707 *mat = Some(HalfMatch::new(pattern, sp.end));
708 } else if sid.is_quit() {
709 return Err(MatchError::quit(b, sp.end));
710 }
711 }
712 None => {
713 *sid = dfa
714 .next_eoi_state(cache, *sid)
715 .map_err(|_| gave_up(input.haystack().len()))?;
716 if sid.is_match() {
717 let pattern = dfa.match_pattern(cache, *sid, 0);
718 *mat = Some(HalfMatch::new(pattern, input.haystack().len()));
719 }
720 // N.B. We don't have to check 'is_quit' here because the EOI
721 // transition can never lead to a quit state.
722 debug_assert!(!sid.is_quit());
723 }
724 }
725 Ok(())
726}
727
728#[cfg_attr(feature = "perf-inline", inline(always))]
729fn eoi_rev(
730 dfa: &DFA,
731 cache: &mut Cache,
732 input: &Input<'_>,
733 sid: &mut LazyStateID,
734 mat: &mut Option<HalfMatch>,
735) -> Result<(), MatchError> {
736 let sp = input.get_span();
737 if sp.start > 0 {
738 let byte = input.haystack()[sp.start - 1];
739 *sid = dfa
740 .next_state(cache, *sid, byte)
741 .map_err(|_| gave_up(sp.start))?;
742 if sid.is_match() {
743 let pattern = dfa.match_pattern(cache, *sid, 0);
744 *mat = Some(HalfMatch::new(pattern, sp.start));
745 } else if sid.is_quit() {
746 return Err(MatchError::quit(byte, sp.start - 1));
747 }
748 } else {
749 *sid =
750 dfa.next_eoi_state(cache, *sid).map_err(|_| gave_up(sp.start))?;
751 if sid.is_match() {
752 let pattern = dfa.match_pattern(cache, *sid, 0);
753 *mat = Some(HalfMatch::new(pattern, 0));
754 }
755 // N.B. We don't have to check 'is_quit' here because the EOI
756 // transition can never lead to a quit state.
757 debug_assert!(!sid.is_quit());
758 }
759 Ok(())
760}
761
762/// Re-compute the starting state that a DFA should be in after finding a
763/// prefilter candidate match at the position `at`.
764///
765/// It is always correct to call this, but not always necessary. Namely,
766/// whenever the DFA has a universal start state, the DFA can remain in the
767/// start state that it was in when it ran the prefilter. Why? Because in that
768/// case, there is only one start state.
769///
770/// When does a DFA have a universal start state? In precisely cases where
771/// it has no look-around assertions in its prefix. So for example, `\bfoo`
772/// does not have a universal start state because the start state depends on
773/// whether the byte immediately before the start position is a word byte or
774/// not. However, `foo\b` does have a universal start state because the word
775/// boundary does not appear in the pattern's prefix.
776///
777/// So... most cases don't need this, but when a pattern doesn't have a
778/// universal start state, then after a prefilter candidate has been found, the
779/// current state *must* be re-litigated as if computing the start state at the
780/// beginning of the search because it might change. That is, not all start
781/// states are created equal.
782///
783/// Why avoid it? Because while it's not super expensive, it isn't a trivial
784/// operation to compute the start state. It is much better to avoid it and
785/// just state in the current state if you know it to be correct.
786#[cfg_attr(feature = "perf-inline", inline(always))]
787fn prefilter_restart(
788 dfa: &DFA,
789 cache: &mut Cache,
790 input: &Input<'_>,
791 at: usize,
792) -> Result<LazyStateID, MatchError> {
793 let mut input = input.clone();
794 input.set_start(at);
795 init_fwd(dfa, cache, &input)
796}
797
798/// A convenience routine for constructing a "gave up" match error.
799#[cfg_attr(feature = "perf-inline", inline(always))]
800fn gave_up(offset: usize) -> MatchError {
801 MatchError::gave_up(offset)
802}
803