1 | use crate::{ |
2 | hybrid::{ |
3 | dfa::{Cache, OverlappingState, DFA}, |
4 | id::LazyStateID, |
5 | }, |
6 | util::{ |
7 | prefilter::Prefilter, |
8 | search::{HalfMatch, Input, MatchError, Span}, |
9 | }, |
10 | }; |
11 | |
12 | #[inline (never)] |
13 | pub(crate) fn find_fwd( |
14 | dfa: &DFA, |
15 | cache: &mut Cache, |
16 | input: &Input<'_>, |
17 | ) -> Result<Option<HalfMatch>, MatchError> { |
18 | if input.is_done() { |
19 | return Ok(None); |
20 | } |
21 | let pre = if input.get_anchored().is_anchored() { |
22 | None |
23 | } else { |
24 | dfa.get_config().get_prefilter() |
25 | }; |
26 | // So what we do here is specialize four different versions of 'find_fwd': |
27 | // one for each of the combinations for 'has prefilter' and 'is earliest |
28 | // search'. The reason for doing this is that both of these things require |
29 | // branches and special handling in some code that can be very hot, |
30 | // and shaving off as much as we can when we don't need it tends to be |
31 | // beneficial in ad hoc benchmarks. To see these differences, you often |
32 | // need a query with a high match count. In other words, specializing these |
33 | // four routines *tends* to help latency more than throughput. |
34 | if pre.is_some() { |
35 | if input.get_earliest() { |
36 | find_fwd_imp(dfa, cache, input, pre, true) |
37 | } else { |
38 | find_fwd_imp(dfa, cache, input, pre, false) |
39 | } |
40 | } else { |
41 | if input.get_earliest() { |
42 | find_fwd_imp(dfa, cache, input, None, true) |
43 | } else { |
44 | find_fwd_imp(dfa, cache, input, None, false) |
45 | } |
46 | } |
47 | } |
48 | |
49 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
50 | fn find_fwd_imp( |
51 | dfa: &DFA, |
52 | cache: &mut Cache, |
53 | input: &Input<'_>, |
54 | pre: Option<&'_ Prefilter>, |
55 | earliest: bool, |
56 | ) -> Result<Option<HalfMatch>, MatchError> { |
57 | // See 'prefilter_restart' docs for explanation. |
58 | let universal_start = dfa.get_nfa().look_set_prefix_any().is_empty(); |
59 | let mut mat = None; |
60 | let mut sid = init_fwd(dfa, cache, input)?; |
61 | let mut at = input.start(); |
62 | // This could just be a closure, but then I think it would be unsound |
63 | // because it would need to be safe to invoke. This way, the lack of safety |
64 | // is clearer in the code below. |
65 | macro_rules! next_unchecked { |
66 | ($sid:expr, $at:expr) => {{ |
67 | let byte = *input.haystack().get_unchecked($at); |
68 | dfa.next_state_untagged_unchecked(cache, $sid, byte) |
69 | }}; |
70 | } |
71 | |
72 | if let Some(ref pre) = pre { |
73 | let span = Span::from(at..input.end()); |
74 | match pre.find(input.haystack(), span) { |
75 | None => return Ok(mat), |
76 | Some(ref span) => { |
77 | at = span.start; |
78 | if !universal_start { |
79 | sid = prefilter_restart(dfa, cache, &input, at)?; |
80 | } |
81 | } |
82 | } |
83 | } |
84 | cache.search_start(at); |
85 | while at < input.end() { |
86 | if sid.is_tagged() { |
87 | cache.search_update(at); |
88 | sid = dfa |
89 | .next_state(cache, sid, input.haystack()[at]) |
90 | .map_err(|_| gave_up(at))?; |
91 | } else { |
92 | // SAFETY: There are two safety invariants we need to uphold |
93 | // here in the loops below: that 'sid' and 'prev_sid' are valid |
94 | // state IDs for this DFA, and that 'at' is a valid index into |
95 | // 'haystack'. For the former, we rely on the invariant that |
96 | // next_state* and start_state_forward always returns a valid state |
97 | // ID (given a valid state ID in the former case), and that we are |
98 | // only at this place in the code if 'sid' is untagged. Moreover, |
99 | // every call to next_state_untagged_unchecked below is guarded by |
100 | // a check that sid is untagged. For the latter safety invariant, |
101 | // we always guard unchecked access with a check that 'at' is less |
102 | // than 'end', where 'end <= haystack.len()'. In the unrolled loop |
103 | // below, we ensure that 'at' is always in bounds. |
104 | // |
105 | // PERF: For justification of omitting bounds checks, it gives us a |
106 | // ~10% bump in search time. This was used for a benchmark: |
107 | // |
108 | // regex-cli find half hybrid -p '(?m)^.+$' -UBb bigfile |
109 | // |
110 | // PERF: For justification for the loop unrolling, we use a few |
111 | // different tests: |
112 | // |
113 | // regex-cli find half hybrid -p '\w{50}' -UBb bigfile |
114 | // regex-cli find half hybrid -p '(?m)^.+$' -UBb bigfile |
115 | // regex-cli find half hybrid -p 'ZQZQZQZQ' -UBb bigfile |
116 | // |
117 | // And there are three different configurations: |
118 | // |
119 | // nounroll: this entire 'else' block vanishes and we just |
120 | // always use 'dfa.next_state(..)'. |
121 | // unroll1: just the outer loop below |
122 | // unroll2: just the inner loop below |
123 | // unroll3: both the outer and inner loops below |
124 | // |
125 | // This results in a matrix of timings for each of the above |
126 | // regexes with each of the above unrolling configurations: |
127 | // |
128 | // '\w{50}' '(?m)^.+$' 'ZQZQZQZQ' |
129 | // nounroll 1.51s 2.34s 1.51s |
130 | // unroll1 1.53s 2.32s 1.56s |
131 | // unroll2 2.22s 1.50s 0.61s |
132 | // unroll3 1.67s 1.45s 0.61s |
133 | // |
134 | // Ideally we'd be able to find a configuration that yields the |
135 | // best time for all regexes, but alas we settle for unroll3 that |
136 | // gives us *almost* the best for '\w{50}' and the best for the |
137 | // other two regexes. |
138 | // |
139 | // So what exactly is going on here? The first unrolling (grouping |
140 | // together runs of untagged transitions) specifically targets |
141 | // our choice of representation. The second unrolling (grouping |
142 | // together runs of self-transitions) specifically targets a common |
143 | // DFA topology. Let's dig in a little bit by looking at our |
144 | // regexes: |
145 | // |
146 | // '\w{50}': This regex spends a lot of time outside of the DFA's |
147 | // start state matching some part of the '\w' repetition. This |
148 | // means that it's a bit of a worst case for loop unrolling that |
149 | // targets self-transitions since the self-transitions in '\w{50}' |
150 | // are not particularly active for this haystack. However, the |
151 | // first unrolling (grouping together untagged transitions) |
152 | // does apply quite well here since very few transitions hit |
153 | // match/dead/quit/unknown states. It is however worth mentioning |
154 | // that if start states are configured to be tagged (which you |
155 | // typically want to do if you have a prefilter), then this regex |
156 | // actually slows way down because it is constantly ping-ponging |
157 | // out of the unrolled loop and into the handling of a tagged start |
158 | // state below. But when start states aren't tagged, the unrolled |
159 | // loop stays hot. (This is why it's imperative that start state |
160 | // tagging be disabled when there isn't a prefilter!) |
161 | // |
162 | // '(?m)^.+$': There are two important aspects of this regex: 1) |
163 | // on this haystack, its match count is very high, much higher |
164 | // than the other two regex and 2) it spends the vast majority |
165 | // of its time matching '.+'. Since Unicode mode is disabled, |
166 | // this corresponds to repeatedly following self transitions for |
167 | // the vast majority of the input. This does benefit from the |
168 | // untagged unrolling since most of the transitions will be to |
169 | // untagged states, but the untagged unrolling does more work than |
170 | // what is actually required. Namely, it has to keep track of the |
171 | // previous and next state IDs, which I guess requires a bit more |
172 | // shuffling. This is supported by the fact that nounroll+unroll1 |
173 | // are both slower than unroll2+unroll3, where the latter has a |
174 | // loop unrolling that specifically targets self-transitions. |
175 | // |
176 | // 'ZQZQZQZQ': This one is very similar to '(?m)^.+$' because it |
177 | // spends the vast majority of its time in self-transitions for |
178 | // the (implicit) unanchored prefix. The main difference with |
179 | // '(?m)^.+$' is that it has a much lower match count. So there |
180 | // isn't much time spent in the overhead of reporting matches. This |
181 | // is the primary explainer in the perf difference here. We include |
182 | // this regex and the former to make sure we have comparison points |
183 | // with high and low match counts. |
184 | // |
185 | // NOTE: I used 'OpenSubtitles2018.raw.sample.en' for 'bigfile'. |
186 | // |
187 | // NOTE: In a follow-up, it turns out that the "inner" loop |
188 | // mentioned above was a pretty big pessimization in some other |
189 | // cases. Namely, it resulted in too much ping-ponging into and out |
190 | // of the loop, which resulted in nearly ~2x regressions in search |
191 | // time when compared to the originally lazy DFA in the regex crate. |
192 | // So I've removed the second loop unrolling that targets the |
193 | // self-transition case. |
194 | let mut prev_sid = sid; |
195 | while at < input.end() { |
196 | prev_sid = unsafe { next_unchecked!(sid, at) }; |
197 | if prev_sid.is_tagged() || at + 3 >= input.end() { |
198 | core::mem::swap(&mut prev_sid, &mut sid); |
199 | break; |
200 | } |
201 | at += 1; |
202 | |
203 | sid = unsafe { next_unchecked!(prev_sid, at) }; |
204 | if sid.is_tagged() { |
205 | break; |
206 | } |
207 | at += 1; |
208 | |
209 | prev_sid = unsafe { next_unchecked!(sid, at) }; |
210 | if prev_sid.is_tagged() { |
211 | core::mem::swap(&mut prev_sid, &mut sid); |
212 | break; |
213 | } |
214 | at += 1; |
215 | |
216 | sid = unsafe { next_unchecked!(prev_sid, at) }; |
217 | if sid.is_tagged() { |
218 | break; |
219 | } |
220 | at += 1; |
221 | } |
222 | // If we quit out of the code above with an unknown state ID at |
223 | // any point, then we need to re-compute that transition using |
224 | // 'next_state', which will do NFA powerset construction for us. |
225 | if sid.is_unknown() { |
226 | cache.search_update(at); |
227 | sid = dfa |
228 | .next_state(cache, prev_sid, input.haystack()[at]) |
229 | .map_err(|_| gave_up(at))?; |
230 | } |
231 | } |
232 | if sid.is_tagged() { |
233 | if sid.is_start() { |
234 | if let Some(ref pre) = pre { |
235 | let span = Span::from(at..input.end()); |
236 | match pre.find(input.haystack(), span) { |
237 | None => { |
238 | cache.search_finish(span.end); |
239 | return Ok(mat); |
240 | } |
241 | Some(ref span) => { |
242 | // We want to skip any update to 'at' below |
243 | // at the end of this iteration and just |
244 | // jump immediately back to the next state |
245 | // transition at the leading position of the |
246 | // candidate match. |
247 | // |
248 | // ... but only if we actually made progress |
249 | // with our prefilter, otherwise if the start |
250 | // state has a self-loop, we can get stuck. |
251 | if span.start > at { |
252 | at = span.start; |
253 | if !universal_start { |
254 | sid = prefilter_restart( |
255 | dfa, cache, &input, at, |
256 | )?; |
257 | } |
258 | continue; |
259 | } |
260 | } |
261 | } |
262 | } |
263 | } else if sid.is_match() { |
264 | let pattern = dfa.match_pattern(cache, sid, 0); |
265 | // Since slice ranges are inclusive at the beginning and |
266 | // exclusive at the end, and since forward searches report |
267 | // the end, we can return 'at' as-is. This only works because |
268 | // matches are delayed by 1 byte. So by the time we observe a |
269 | // match, 'at' has already been set to 1 byte past the actual |
270 | // match location, which is precisely the exclusive ending |
271 | // bound of the match. |
272 | mat = Some(HalfMatch::new(pattern, at)); |
273 | if earliest { |
274 | cache.search_finish(at); |
275 | return Ok(mat); |
276 | } |
277 | } else if sid.is_dead() { |
278 | cache.search_finish(at); |
279 | return Ok(mat); |
280 | } else if sid.is_quit() { |
281 | cache.search_finish(at); |
282 | return Err(MatchError::quit(input.haystack()[at], at)); |
283 | } else { |
284 | debug_assert!(sid.is_unknown()); |
285 | unreachable!("sid being unknown is a bug" ); |
286 | } |
287 | } |
288 | at += 1; |
289 | } |
290 | eoi_fwd(dfa, cache, input, &mut sid, &mut mat)?; |
291 | cache.search_finish(input.end()); |
292 | Ok(mat) |
293 | } |
294 | |
295 | #[inline (never)] |
296 | pub(crate) fn find_rev( |
297 | dfa: &DFA, |
298 | cache: &mut Cache, |
299 | input: &Input<'_>, |
300 | ) -> Result<Option<HalfMatch>, MatchError> { |
301 | if input.is_done() { |
302 | return Ok(None); |
303 | } |
304 | if input.get_earliest() { |
305 | find_rev_imp(dfa, cache, input, true) |
306 | } else { |
307 | find_rev_imp(dfa, cache, input, false) |
308 | } |
309 | } |
310 | |
311 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
312 | fn find_rev_imp( |
313 | dfa: &DFA, |
314 | cache: &mut Cache, |
315 | input: &Input<'_>, |
316 | earliest: bool, |
317 | ) -> Result<Option<HalfMatch>, MatchError> { |
318 | let mut mat = None; |
319 | let mut sid = init_rev(dfa, cache, input)?; |
320 | // In reverse search, the loop below can't handle the case of searching an |
321 | // empty slice. Ideally we could write something congruent to the forward |
322 | // search, i.e., 'while at >= start', but 'start' might be 0. Since we use |
323 | // an unsigned offset, 'at >= 0' is trivially always true. We could avoid |
324 | // this extra case handling by using a signed offset, but Rust makes it |
325 | // annoying to do. So... We just handle the empty case separately. |
326 | if input.start() == input.end() { |
327 | eoi_rev(dfa, cache, input, &mut sid, &mut mat)?; |
328 | return Ok(mat); |
329 | } |
330 | |
331 | let mut at = input.end() - 1; |
332 | macro_rules! next_unchecked { |
333 | ($sid:expr, $at:expr) => {{ |
334 | let byte = *input.haystack().get_unchecked($at); |
335 | dfa.next_state_untagged_unchecked(cache, $sid, byte) |
336 | }}; |
337 | } |
338 | cache.search_start(at); |
339 | loop { |
340 | if sid.is_tagged() { |
341 | cache.search_update(at); |
342 | sid = dfa |
343 | .next_state(cache, sid, input.haystack()[at]) |
344 | .map_err(|_| gave_up(at))?; |
345 | } else { |
346 | // SAFETY: See comments in 'find_fwd' for a safety argument. |
347 | // |
348 | // PERF: The comments in 'find_fwd' also provide a justification |
349 | // from a performance perspective as to 1) why we elide bounds |
350 | // checks and 2) why we do a specialized version of unrolling |
351 | // below. The reverse search does have a slightly different |
352 | // consideration in that most reverse searches tend to be |
353 | // anchored and on shorter haystacks. However, this still makes a |
354 | // difference. Take this command for example: |
355 | // |
356 | // regex-cli find match hybrid -p '(?m)^.+$' -UBb bigfile |
357 | // |
358 | // (Notice that we use 'find hybrid regex', not 'find hybrid dfa' |
359 | // like in the justification for the forward direction. The 'regex' |
360 | // sub-command will find start-of-match and thus run the reverse |
361 | // direction.) |
362 | // |
363 | // Without unrolling below, the above command takes around 3.76s. |
364 | // But with the unrolling below, we get down to 2.55s. If we keep |
365 | // the unrolling but add in bounds checks, then we get 2.86s. |
366 | // |
367 | // NOTE: I used 'OpenSubtitles2018.raw.sample.en' for 'bigfile'. |
368 | let mut prev_sid = sid; |
369 | while at >= input.start() { |
370 | prev_sid = unsafe { next_unchecked!(sid, at) }; |
371 | if prev_sid.is_tagged() |
372 | || at <= input.start().saturating_add(3) |
373 | { |
374 | core::mem::swap(&mut prev_sid, &mut sid); |
375 | break; |
376 | } |
377 | at -= 1; |
378 | |
379 | sid = unsafe { next_unchecked!(prev_sid, at) }; |
380 | if sid.is_tagged() { |
381 | break; |
382 | } |
383 | at -= 1; |
384 | |
385 | prev_sid = unsafe { next_unchecked!(sid, at) }; |
386 | if prev_sid.is_tagged() { |
387 | core::mem::swap(&mut prev_sid, &mut sid); |
388 | break; |
389 | } |
390 | at -= 1; |
391 | |
392 | sid = unsafe { next_unchecked!(prev_sid, at) }; |
393 | if sid.is_tagged() { |
394 | break; |
395 | } |
396 | at -= 1; |
397 | } |
398 | // If we quit out of the code above with an unknown state ID at |
399 | // any point, then we need to re-compute that transition using |
400 | // 'next_state', which will do NFA powerset construction for us. |
401 | if sid.is_unknown() { |
402 | cache.search_update(at); |
403 | sid = dfa |
404 | .next_state(cache, prev_sid, input.haystack()[at]) |
405 | .map_err(|_| gave_up(at))?; |
406 | } |
407 | } |
408 | if sid.is_tagged() { |
409 | if sid.is_start() { |
410 | // do nothing |
411 | } else if sid.is_match() { |
412 | let pattern = dfa.match_pattern(cache, sid, 0); |
413 | // Since reverse searches report the beginning of a match |
414 | // and the beginning is inclusive (not exclusive like the |
415 | // end of a match), we add 1 to make it inclusive. |
416 | mat = Some(HalfMatch::new(pattern, at + 1)); |
417 | if earliest { |
418 | cache.search_finish(at); |
419 | return Ok(mat); |
420 | } |
421 | } else if sid.is_dead() { |
422 | cache.search_finish(at); |
423 | return Ok(mat); |
424 | } else if sid.is_quit() { |
425 | cache.search_finish(at); |
426 | return Err(MatchError::quit(input.haystack()[at], at)); |
427 | } else { |
428 | debug_assert!(sid.is_unknown()); |
429 | unreachable!("sid being unknown is a bug" ); |
430 | } |
431 | } |
432 | if at == input.start() { |
433 | break; |
434 | } |
435 | at -= 1; |
436 | } |
437 | cache.search_finish(input.start()); |
438 | eoi_rev(dfa, cache, input, &mut sid, &mut mat)?; |
439 | Ok(mat) |
440 | } |
441 | |
442 | #[inline (never)] |
443 | pub(crate) fn find_overlapping_fwd( |
444 | dfa: &DFA, |
445 | cache: &mut Cache, |
446 | input: &Input<'_>, |
447 | state: &mut OverlappingState, |
448 | ) -> Result<(), MatchError> { |
449 | state.mat = None; |
450 | if input.is_done() { |
451 | return Ok(()); |
452 | } |
453 | let pre = if input.get_anchored().is_anchored() { |
454 | None |
455 | } else { |
456 | dfa.get_config().get_prefilter() |
457 | }; |
458 | if pre.is_some() { |
459 | find_overlapping_fwd_imp(dfa, cache, input, pre, state) |
460 | } else { |
461 | find_overlapping_fwd_imp(dfa, cache, input, None, state) |
462 | } |
463 | } |
464 | |
465 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
466 | fn find_overlapping_fwd_imp( |
467 | dfa: &DFA, |
468 | cache: &mut Cache, |
469 | input: &Input<'_>, |
470 | pre: Option<&'_ Prefilter>, |
471 | state: &mut OverlappingState, |
472 | ) -> Result<(), MatchError> { |
473 | // See 'prefilter_restart' docs for explanation. |
474 | let universal_start = dfa.get_nfa().look_set_prefix_any().is_empty(); |
475 | let mut sid = match state.id { |
476 | None => { |
477 | state.at = input.start(); |
478 | init_fwd(dfa, cache, input)? |
479 | } |
480 | Some(sid) => { |
481 | if let Some(match_index) = state.next_match_index { |
482 | let match_len = dfa.match_len(cache, sid); |
483 | if match_index < match_len { |
484 | state.next_match_index = Some(match_index + 1); |
485 | let pattern = dfa.match_pattern(cache, sid, match_index); |
486 | state.mat = Some(HalfMatch::new(pattern, state.at)); |
487 | return Ok(()); |
488 | } |
489 | } |
490 | // Once we've reported all matches at a given position, we need to |
491 | // advance the search to the next position. |
492 | state.at += 1; |
493 | if state.at > input.end() { |
494 | return Ok(()); |
495 | } |
496 | sid |
497 | } |
498 | }; |
499 | |
500 | // NOTE: We don't optimize the crap out of this routine primarily because |
501 | // it seems like most overlapping searches will have higher match counts, |
502 | // and thus, throughput is perhaps not as important. But if you have a use |
503 | // case for something faster, feel free to file an issue. |
504 | cache.search_start(state.at); |
505 | while state.at < input.end() { |
506 | sid = dfa |
507 | .next_state(cache, sid, input.haystack()[state.at]) |
508 | .map_err(|_| gave_up(state.at))?; |
509 | if sid.is_tagged() { |
510 | state.id = Some(sid); |
511 | if sid.is_start() { |
512 | if let Some(ref pre) = pre { |
513 | let span = Span::from(state.at..input.end()); |
514 | match pre.find(input.haystack(), span) { |
515 | None => return Ok(()), |
516 | Some(ref span) => { |
517 | if span.start > state.at { |
518 | state.at = span.start; |
519 | if !universal_start { |
520 | sid = prefilter_restart( |
521 | dfa, cache, &input, state.at, |
522 | )?; |
523 | } |
524 | continue; |
525 | } |
526 | } |
527 | } |
528 | } |
529 | } else if sid.is_match() { |
530 | state.next_match_index = Some(1); |
531 | let pattern = dfa.match_pattern(cache, sid, 0); |
532 | state.mat = Some(HalfMatch::new(pattern, state.at)); |
533 | cache.search_finish(state.at); |
534 | return Ok(()); |
535 | } else if sid.is_dead() { |
536 | cache.search_finish(state.at); |
537 | return Ok(()); |
538 | } else if sid.is_quit() { |
539 | cache.search_finish(state.at); |
540 | return Err(MatchError::quit( |
541 | input.haystack()[state.at], |
542 | state.at, |
543 | )); |
544 | } else { |
545 | debug_assert!(sid.is_unknown()); |
546 | unreachable!("sid being unknown is a bug" ); |
547 | } |
548 | } |
549 | state.at += 1; |
550 | cache.search_update(state.at); |
551 | } |
552 | |
553 | let result = eoi_fwd(dfa, cache, input, &mut sid, &mut state.mat); |
554 | state.id = Some(sid); |
555 | if state.mat.is_some() { |
556 | // '1' is always correct here since if we get to this point, this |
557 | // always corresponds to the first (index '0') match discovered at |
558 | // this position. So the next match to report at this position (if |
559 | // it exists) is at index '1'. |
560 | state.next_match_index = Some(1); |
561 | } |
562 | cache.search_finish(input.end()); |
563 | result |
564 | } |
565 | |
566 | #[inline (never)] |
567 | pub(crate) fn find_overlapping_rev( |
568 | dfa: &DFA, |
569 | cache: &mut Cache, |
570 | input: &Input<'_>, |
571 | state: &mut OverlappingState, |
572 | ) -> Result<(), MatchError> { |
573 | state.mat = None; |
574 | if input.is_done() { |
575 | return Ok(()); |
576 | } |
577 | let mut sid = match state.id { |
578 | None => { |
579 | let sid = init_rev(dfa, cache, input)?; |
580 | state.id = Some(sid); |
581 | if input.start() == input.end() { |
582 | state.rev_eoi = true; |
583 | } else { |
584 | state.at = input.end() - 1; |
585 | } |
586 | sid |
587 | } |
588 | Some(sid) => { |
589 | if let Some(match_index) = state.next_match_index { |
590 | let match_len = dfa.match_len(cache, sid); |
591 | if match_index < match_len { |
592 | state.next_match_index = Some(match_index + 1); |
593 | let pattern = dfa.match_pattern(cache, sid, match_index); |
594 | state.mat = Some(HalfMatch::new(pattern, state.at)); |
595 | return Ok(()); |
596 | } |
597 | } |
598 | // Once we've reported all matches at a given position, we need |
599 | // to advance the search to the next position. However, if we've |
600 | // already followed the EOI transition, then we know we're done |
601 | // with the search and there cannot be any more matches to report. |
602 | if state.rev_eoi { |
603 | return Ok(()); |
604 | } else if state.at == input.start() { |
605 | // At this point, we should follow the EOI transition. This |
606 | // will cause us the skip the main loop below and fall through |
607 | // to the final 'eoi_rev' transition. |
608 | state.rev_eoi = true; |
609 | } else { |
610 | // We haven't hit the end of the search yet, so move on. |
611 | state.at -= 1; |
612 | } |
613 | sid |
614 | } |
615 | }; |
616 | cache.search_start(state.at); |
617 | while !state.rev_eoi { |
618 | sid = dfa |
619 | .next_state(cache, sid, input.haystack()[state.at]) |
620 | .map_err(|_| gave_up(state.at))?; |
621 | if sid.is_tagged() { |
622 | state.id = Some(sid); |
623 | if sid.is_start() { |
624 | // do nothing |
625 | } else if sid.is_match() { |
626 | state.next_match_index = Some(1); |
627 | let pattern = dfa.match_pattern(cache, sid, 0); |
628 | state.mat = Some(HalfMatch::new(pattern, state.at + 1)); |
629 | cache.search_finish(state.at); |
630 | return Ok(()); |
631 | } else if sid.is_dead() { |
632 | cache.search_finish(state.at); |
633 | return Ok(()); |
634 | } else if sid.is_quit() { |
635 | cache.search_finish(state.at); |
636 | return Err(MatchError::quit( |
637 | input.haystack()[state.at], |
638 | state.at, |
639 | )); |
640 | } else { |
641 | debug_assert!(sid.is_unknown()); |
642 | unreachable!("sid being unknown is a bug" ); |
643 | } |
644 | } |
645 | if state.at == input.start() { |
646 | break; |
647 | } |
648 | state.at -= 1; |
649 | cache.search_update(state.at); |
650 | } |
651 | |
652 | let result = eoi_rev(dfa, cache, input, &mut sid, &mut state.mat); |
653 | state.rev_eoi = true; |
654 | state.id = Some(sid); |
655 | if state.mat.is_some() { |
656 | // '1' is always correct here since if we get to this point, this |
657 | // always corresponds to the first (index '0') match discovered at |
658 | // this position. So the next match to report at this position (if |
659 | // it exists) is at index '1'. |
660 | state.next_match_index = Some(1); |
661 | } |
662 | cache.search_finish(input.start()); |
663 | result |
664 | } |
665 | |
666 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
667 | fn init_fwd( |
668 | dfa: &DFA, |
669 | cache: &mut Cache, |
670 | input: &Input<'_>, |
671 | ) -> Result<LazyStateID, MatchError> { |
672 | let sid = dfa.start_state_forward(cache, input)?; |
673 | // Start states can never be match states, since all matches are delayed |
674 | // by 1 byte. |
675 | debug_assert!(!sid.is_match()); |
676 | Ok(sid) |
677 | } |
678 | |
679 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
680 | fn init_rev( |
681 | dfa: &DFA, |
682 | cache: &mut Cache, |
683 | input: &Input<'_>, |
684 | ) -> Result<LazyStateID, MatchError> { |
685 | let sid = dfa.start_state_reverse(cache, input)?; |
686 | // Start states can never be match states, since all matches are delayed |
687 | // by 1 byte. |
688 | debug_assert!(!sid.is_match()); |
689 | Ok(sid) |
690 | } |
691 | |
692 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
693 | fn eoi_fwd( |
694 | dfa: &DFA, |
695 | cache: &mut Cache, |
696 | input: &Input<'_>, |
697 | sid: &mut LazyStateID, |
698 | mat: &mut Option<HalfMatch>, |
699 | ) -> Result<(), MatchError> { |
700 | let sp = input.get_span(); |
701 | match input.haystack().get(sp.end) { |
702 | Some(&b) => { |
703 | *sid = |
704 | dfa.next_state(cache, *sid, b).map_err(|_| gave_up(sp.end))?; |
705 | if sid.is_match() { |
706 | let pattern = dfa.match_pattern(cache, *sid, 0); |
707 | *mat = Some(HalfMatch::new(pattern, sp.end)); |
708 | } else if sid.is_quit() { |
709 | return Err(MatchError::quit(b, sp.end)); |
710 | } |
711 | } |
712 | None => { |
713 | *sid = dfa |
714 | .next_eoi_state(cache, *sid) |
715 | .map_err(|_| gave_up(input.haystack().len()))?; |
716 | if sid.is_match() { |
717 | let pattern = dfa.match_pattern(cache, *sid, 0); |
718 | *mat = Some(HalfMatch::new(pattern, input.haystack().len())); |
719 | } |
720 | // N.B. We don't have to check 'is_quit' here because the EOI |
721 | // transition can never lead to a quit state. |
722 | debug_assert!(!sid.is_quit()); |
723 | } |
724 | } |
725 | Ok(()) |
726 | } |
727 | |
728 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
729 | fn eoi_rev( |
730 | dfa: &DFA, |
731 | cache: &mut Cache, |
732 | input: &Input<'_>, |
733 | sid: &mut LazyStateID, |
734 | mat: &mut Option<HalfMatch>, |
735 | ) -> Result<(), MatchError> { |
736 | let sp = input.get_span(); |
737 | if sp.start > 0 { |
738 | let byte = input.haystack()[sp.start - 1]; |
739 | *sid = dfa |
740 | .next_state(cache, *sid, byte) |
741 | .map_err(|_| gave_up(sp.start))?; |
742 | if sid.is_match() { |
743 | let pattern = dfa.match_pattern(cache, *sid, 0); |
744 | *mat = Some(HalfMatch::new(pattern, sp.start)); |
745 | } else if sid.is_quit() { |
746 | return Err(MatchError::quit(byte, sp.start - 1)); |
747 | } |
748 | } else { |
749 | *sid = |
750 | dfa.next_eoi_state(cache, *sid).map_err(|_| gave_up(sp.start))?; |
751 | if sid.is_match() { |
752 | let pattern = dfa.match_pattern(cache, *sid, 0); |
753 | *mat = Some(HalfMatch::new(pattern, 0)); |
754 | } |
755 | // N.B. We don't have to check 'is_quit' here because the EOI |
756 | // transition can never lead to a quit state. |
757 | debug_assert!(!sid.is_quit()); |
758 | } |
759 | Ok(()) |
760 | } |
761 | |
762 | /// Re-compute the starting state that a DFA should be in after finding a |
763 | /// prefilter candidate match at the position `at`. |
764 | /// |
765 | /// It is always correct to call this, but not always necessary. Namely, |
766 | /// whenever the DFA has a universal start state, the DFA can remain in the |
767 | /// start state that it was in when it ran the prefilter. Why? Because in that |
768 | /// case, there is only one start state. |
769 | /// |
770 | /// When does a DFA have a universal start state? In precisely cases where |
771 | /// it has no look-around assertions in its prefix. So for example, `\bfoo` |
772 | /// does not have a universal start state because the start state depends on |
773 | /// whether the byte immediately before the start position is a word byte or |
774 | /// not. However, `foo\b` does have a universal start state because the word |
775 | /// boundary does not appear in the pattern's prefix. |
776 | /// |
777 | /// So... most cases don't need this, but when a pattern doesn't have a |
778 | /// universal start state, then after a prefilter candidate has been found, the |
779 | /// current state *must* be re-litigated as if computing the start state at the |
780 | /// beginning of the search because it might change. That is, not all start |
781 | /// states are created equal. |
782 | /// |
783 | /// Why avoid it? Because while it's not super expensive, it isn't a trivial |
784 | /// operation to compute the start state. It is much better to avoid it and |
785 | /// just state in the current state if you know it to be correct. |
786 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
787 | fn prefilter_restart( |
788 | dfa: &DFA, |
789 | cache: &mut Cache, |
790 | input: &Input<'_>, |
791 | at: usize, |
792 | ) -> Result<LazyStateID, MatchError> { |
793 | let mut input = input.clone(); |
794 | input.set_start(at); |
795 | init_fwd(dfa, cache, &input) |
796 | } |
797 | |
798 | /// A convenience routine for constructing a "gave up" match error. |
799 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
800 | fn gave_up(offset: usize) -> MatchError { |
801 | MatchError::gave_up(offset) |
802 | } |
803 | |