| 1 | use crate::{ |
| 2 | hybrid::{ |
| 3 | dfa::{Cache, OverlappingState, DFA}, |
| 4 | id::LazyStateID, |
| 5 | }, |
| 6 | util::{ |
| 7 | prefilter::Prefilter, |
| 8 | search::{HalfMatch, Input, MatchError, Span}, |
| 9 | }, |
| 10 | }; |
| 11 | |
| 12 | #[inline (never)] |
| 13 | pub(crate) fn find_fwd( |
| 14 | dfa: &DFA, |
| 15 | cache: &mut Cache, |
| 16 | input: &Input<'_>, |
| 17 | ) -> Result<Option<HalfMatch>, MatchError> { |
| 18 | if input.is_done() { |
| 19 | return Ok(None); |
| 20 | } |
| 21 | let pre = if input.get_anchored().is_anchored() { |
| 22 | None |
| 23 | } else { |
| 24 | dfa.get_config().get_prefilter() |
| 25 | }; |
| 26 | // So what we do here is specialize four different versions of 'find_fwd': |
| 27 | // one for each of the combinations for 'has prefilter' and 'is earliest |
| 28 | // search'. The reason for doing this is that both of these things require |
| 29 | // branches and special handling in some code that can be very hot, |
| 30 | // and shaving off as much as we can when we don't need it tends to be |
| 31 | // beneficial in ad hoc benchmarks. To see these differences, you often |
| 32 | // need a query with a high match count. In other words, specializing these |
| 33 | // four routines *tends* to help latency more than throughput. |
| 34 | if pre.is_some() { |
| 35 | if input.get_earliest() { |
| 36 | find_fwd_imp(dfa, cache, input, pre, true) |
| 37 | } else { |
| 38 | find_fwd_imp(dfa, cache, input, pre, false) |
| 39 | } |
| 40 | } else { |
| 41 | if input.get_earliest() { |
| 42 | find_fwd_imp(dfa, cache, input, None, true) |
| 43 | } else { |
| 44 | find_fwd_imp(dfa, cache, input, None, false) |
| 45 | } |
| 46 | } |
| 47 | } |
| 48 | |
| 49 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
| 50 | fn find_fwd_imp( |
| 51 | dfa: &DFA, |
| 52 | cache: &mut Cache, |
| 53 | input: &Input<'_>, |
| 54 | pre: Option<&'_ Prefilter>, |
| 55 | earliest: bool, |
| 56 | ) -> Result<Option<HalfMatch>, MatchError> { |
| 57 | // See 'prefilter_restart' docs for explanation. |
| 58 | let universal_start = dfa.get_nfa().look_set_prefix_any().is_empty(); |
| 59 | let mut mat = None; |
| 60 | let mut sid = init_fwd(dfa, cache, input)?; |
| 61 | let mut at = input.start(); |
| 62 | // This could just be a closure, but then I think it would be unsound |
| 63 | // because it would need to be safe to invoke. This way, the lack of safety |
| 64 | // is clearer in the code below. |
| 65 | macro_rules! next_unchecked { |
| 66 | ($sid:expr, $at:expr) => {{ |
| 67 | let byte = *input.haystack().get_unchecked($at); |
| 68 | dfa.next_state_untagged_unchecked(cache, $sid, byte) |
| 69 | }}; |
| 70 | } |
| 71 | |
| 72 | if let Some(ref pre) = pre { |
| 73 | let span = Span::from(at..input.end()); |
| 74 | match pre.find(input.haystack(), span) { |
| 75 | None => return Ok(mat), |
| 76 | Some(ref span) => { |
| 77 | at = span.start; |
| 78 | if !universal_start { |
| 79 | sid = prefilter_restart(dfa, cache, &input, at)?; |
| 80 | } |
| 81 | } |
| 82 | } |
| 83 | } |
| 84 | cache.search_start(at); |
| 85 | while at < input.end() { |
| 86 | if sid.is_tagged() { |
| 87 | cache.search_update(at); |
| 88 | sid = dfa |
| 89 | .next_state(cache, sid, input.haystack()[at]) |
| 90 | .map_err(|_| gave_up(at))?; |
| 91 | } else { |
| 92 | // SAFETY: There are two safety invariants we need to uphold |
| 93 | // here in the loops below: that 'sid' and 'prev_sid' are valid |
| 94 | // state IDs for this DFA, and that 'at' is a valid index into |
| 95 | // 'haystack'. For the former, we rely on the invariant that |
| 96 | // next_state* and start_state_forward always returns a valid state |
| 97 | // ID (given a valid state ID in the former case), and that we are |
| 98 | // only at this place in the code if 'sid' is untagged. Moreover, |
| 99 | // every call to next_state_untagged_unchecked below is guarded by |
| 100 | // a check that sid is untagged. For the latter safety invariant, |
| 101 | // we always guard unchecked access with a check that 'at' is less |
| 102 | // than 'end', where 'end <= haystack.len()'. In the unrolled loop |
| 103 | // below, we ensure that 'at' is always in bounds. |
| 104 | // |
| 105 | // PERF: For justification of omitting bounds checks, it gives us a |
| 106 | // ~10% bump in search time. This was used for a benchmark: |
| 107 | // |
| 108 | // regex-cli find half hybrid -p '(?m)^.+$' -UBb bigfile |
| 109 | // |
| 110 | // PERF: For justification for the loop unrolling, we use a few |
| 111 | // different tests: |
| 112 | // |
| 113 | // regex-cli find half hybrid -p '\w{50}' -UBb bigfile |
| 114 | // regex-cli find half hybrid -p '(?m)^.+$' -UBb bigfile |
| 115 | // regex-cli find half hybrid -p 'ZQZQZQZQ' -UBb bigfile |
| 116 | // |
| 117 | // And there are three different configurations: |
| 118 | // |
| 119 | // nounroll: this entire 'else' block vanishes and we just |
| 120 | // always use 'dfa.next_state(..)'. |
| 121 | // unroll1: just the outer loop below |
| 122 | // unroll2: just the inner loop below |
| 123 | // unroll3: both the outer and inner loops below |
| 124 | // |
| 125 | // This results in a matrix of timings for each of the above |
| 126 | // regexes with each of the above unrolling configurations: |
| 127 | // |
| 128 | // '\w{50}' '(?m)^.+$' 'ZQZQZQZQ' |
| 129 | // nounroll 1.51s 2.34s 1.51s |
| 130 | // unroll1 1.53s 2.32s 1.56s |
| 131 | // unroll2 2.22s 1.50s 0.61s |
| 132 | // unroll3 1.67s 1.45s 0.61s |
| 133 | // |
| 134 | // Ideally we'd be able to find a configuration that yields the |
| 135 | // best time for all regexes, but alas we settle for unroll3 that |
| 136 | // gives us *almost* the best for '\w{50}' and the best for the |
| 137 | // other two regexes. |
| 138 | // |
| 139 | // So what exactly is going on here? The first unrolling (grouping |
| 140 | // together runs of untagged transitions) specifically targets |
| 141 | // our choice of representation. The second unrolling (grouping |
| 142 | // together runs of self-transitions) specifically targets a common |
| 143 | // DFA topology. Let's dig in a little bit by looking at our |
| 144 | // regexes: |
| 145 | // |
| 146 | // '\w{50}': This regex spends a lot of time outside of the DFA's |
| 147 | // start state matching some part of the '\w' repetition. This |
| 148 | // means that it's a bit of a worst case for loop unrolling that |
| 149 | // targets self-transitions since the self-transitions in '\w{50}' |
| 150 | // are not particularly active for this haystack. However, the |
| 151 | // first unrolling (grouping together untagged transitions) |
| 152 | // does apply quite well here since very few transitions hit |
| 153 | // match/dead/quit/unknown states. It is however worth mentioning |
| 154 | // that if start states are configured to be tagged (which you |
| 155 | // typically want to do if you have a prefilter), then this regex |
| 156 | // actually slows way down because it is constantly ping-ponging |
| 157 | // out of the unrolled loop and into the handling of a tagged start |
| 158 | // state below. But when start states aren't tagged, the unrolled |
| 159 | // loop stays hot. (This is why it's imperative that start state |
| 160 | // tagging be disabled when there isn't a prefilter!) |
| 161 | // |
| 162 | // '(?m)^.+$': There are two important aspects of this regex: 1) |
| 163 | // on this haystack, its match count is very high, much higher |
| 164 | // than the other two regex and 2) it spends the vast majority |
| 165 | // of its time matching '.+'. Since Unicode mode is disabled, |
| 166 | // this corresponds to repeatedly following self transitions for |
| 167 | // the vast majority of the input. This does benefit from the |
| 168 | // untagged unrolling since most of the transitions will be to |
| 169 | // untagged states, but the untagged unrolling does more work than |
| 170 | // what is actually required. Namely, it has to keep track of the |
| 171 | // previous and next state IDs, which I guess requires a bit more |
| 172 | // shuffling. This is supported by the fact that nounroll+unroll1 |
| 173 | // are both slower than unroll2+unroll3, where the latter has a |
| 174 | // loop unrolling that specifically targets self-transitions. |
| 175 | // |
| 176 | // 'ZQZQZQZQ': This one is very similar to '(?m)^.+$' because it |
| 177 | // spends the vast majority of its time in self-transitions for |
| 178 | // the (implicit) unanchored prefix. The main difference with |
| 179 | // '(?m)^.+$' is that it has a much lower match count. So there |
| 180 | // isn't much time spent in the overhead of reporting matches. This |
| 181 | // is the primary explainer in the perf difference here. We include |
| 182 | // this regex and the former to make sure we have comparison points |
| 183 | // with high and low match counts. |
| 184 | // |
| 185 | // NOTE: I used 'OpenSubtitles2018.raw.sample.en' for 'bigfile'. |
| 186 | // |
| 187 | // NOTE: In a follow-up, it turns out that the "inner" loop |
| 188 | // mentioned above was a pretty big pessimization in some other |
| 189 | // cases. Namely, it resulted in too much ping-ponging into and out |
| 190 | // of the loop, which resulted in nearly ~2x regressions in search |
| 191 | // time when compared to the originally lazy DFA in the regex crate. |
| 192 | // So I've removed the second loop unrolling that targets the |
| 193 | // self-transition case. |
| 194 | let mut prev_sid = sid; |
| 195 | while at < input.end() { |
| 196 | prev_sid = unsafe { next_unchecked!(sid, at) }; |
| 197 | if prev_sid.is_tagged() || at + 3 >= input.end() { |
| 198 | core::mem::swap(&mut prev_sid, &mut sid); |
| 199 | break; |
| 200 | } |
| 201 | at += 1; |
| 202 | |
| 203 | sid = unsafe { next_unchecked!(prev_sid, at) }; |
| 204 | if sid.is_tagged() { |
| 205 | break; |
| 206 | } |
| 207 | at += 1; |
| 208 | |
| 209 | prev_sid = unsafe { next_unchecked!(sid, at) }; |
| 210 | if prev_sid.is_tagged() { |
| 211 | core::mem::swap(&mut prev_sid, &mut sid); |
| 212 | break; |
| 213 | } |
| 214 | at += 1; |
| 215 | |
| 216 | sid = unsafe { next_unchecked!(prev_sid, at) }; |
| 217 | if sid.is_tagged() { |
| 218 | break; |
| 219 | } |
| 220 | at += 1; |
| 221 | } |
| 222 | // If we quit out of the code above with an unknown state ID at |
| 223 | // any point, then we need to re-compute that transition using |
| 224 | // 'next_state', which will do NFA powerset construction for us. |
| 225 | if sid.is_unknown() { |
| 226 | cache.search_update(at); |
| 227 | sid = dfa |
| 228 | .next_state(cache, prev_sid, input.haystack()[at]) |
| 229 | .map_err(|_| gave_up(at))?; |
| 230 | } |
| 231 | } |
| 232 | if sid.is_tagged() { |
| 233 | if sid.is_start() { |
| 234 | if let Some(ref pre) = pre { |
| 235 | let span = Span::from(at..input.end()); |
| 236 | match pre.find(input.haystack(), span) { |
| 237 | None => { |
| 238 | cache.search_finish(span.end); |
| 239 | return Ok(mat); |
| 240 | } |
| 241 | Some(ref span) => { |
| 242 | // We want to skip any update to 'at' below |
| 243 | // at the end of this iteration and just |
| 244 | // jump immediately back to the next state |
| 245 | // transition at the leading position of the |
| 246 | // candidate match. |
| 247 | // |
| 248 | // ... but only if we actually made progress |
| 249 | // with our prefilter, otherwise if the start |
| 250 | // state has a self-loop, we can get stuck. |
| 251 | if span.start > at { |
| 252 | at = span.start; |
| 253 | if !universal_start { |
| 254 | sid = prefilter_restart( |
| 255 | dfa, cache, &input, at, |
| 256 | )?; |
| 257 | } |
| 258 | continue; |
| 259 | } |
| 260 | } |
| 261 | } |
| 262 | } |
| 263 | } else if sid.is_match() { |
| 264 | let pattern = dfa.match_pattern(cache, sid, 0); |
| 265 | // Since slice ranges are inclusive at the beginning and |
| 266 | // exclusive at the end, and since forward searches report |
| 267 | // the end, we can return 'at' as-is. This only works because |
| 268 | // matches are delayed by 1 byte. So by the time we observe a |
| 269 | // match, 'at' has already been set to 1 byte past the actual |
| 270 | // match location, which is precisely the exclusive ending |
| 271 | // bound of the match. |
| 272 | mat = Some(HalfMatch::new(pattern, at)); |
| 273 | if earliest { |
| 274 | cache.search_finish(at); |
| 275 | return Ok(mat); |
| 276 | } |
| 277 | } else if sid.is_dead() { |
| 278 | cache.search_finish(at); |
| 279 | return Ok(mat); |
| 280 | } else if sid.is_quit() { |
| 281 | cache.search_finish(at); |
| 282 | return Err(MatchError::quit(input.haystack()[at], at)); |
| 283 | } else { |
| 284 | debug_assert!(sid.is_unknown()); |
| 285 | unreachable!("sid being unknown is a bug" ); |
| 286 | } |
| 287 | } |
| 288 | at += 1; |
| 289 | } |
| 290 | eoi_fwd(dfa, cache, input, &mut sid, &mut mat)?; |
| 291 | cache.search_finish(input.end()); |
| 292 | Ok(mat) |
| 293 | } |
| 294 | |
| 295 | #[inline (never)] |
| 296 | pub(crate) fn find_rev( |
| 297 | dfa: &DFA, |
| 298 | cache: &mut Cache, |
| 299 | input: &Input<'_>, |
| 300 | ) -> Result<Option<HalfMatch>, MatchError> { |
| 301 | if input.is_done() { |
| 302 | return Ok(None); |
| 303 | } |
| 304 | if input.get_earliest() { |
| 305 | find_rev_imp(dfa, cache, input, earliest:true) |
| 306 | } else { |
| 307 | find_rev_imp(dfa, cache, input, earliest:false) |
| 308 | } |
| 309 | } |
| 310 | |
| 311 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
| 312 | fn find_rev_imp( |
| 313 | dfa: &DFA, |
| 314 | cache: &mut Cache, |
| 315 | input: &Input<'_>, |
| 316 | earliest: bool, |
| 317 | ) -> Result<Option<HalfMatch>, MatchError> { |
| 318 | let mut mat = None; |
| 319 | let mut sid = init_rev(dfa, cache, input)?; |
| 320 | // In reverse search, the loop below can't handle the case of searching an |
| 321 | // empty slice. Ideally we could write something congruent to the forward |
| 322 | // search, i.e., 'while at >= start', but 'start' might be 0. Since we use |
| 323 | // an unsigned offset, 'at >= 0' is trivially always true. We could avoid |
| 324 | // this extra case handling by using a signed offset, but Rust makes it |
| 325 | // annoying to do. So... We just handle the empty case separately. |
| 326 | if input.start() == input.end() { |
| 327 | eoi_rev(dfa, cache, input, &mut sid, &mut mat)?; |
| 328 | return Ok(mat); |
| 329 | } |
| 330 | |
| 331 | let mut at = input.end() - 1; |
| 332 | macro_rules! next_unchecked { |
| 333 | ($sid:expr, $at:expr) => {{ |
| 334 | let byte = *input.haystack().get_unchecked($at); |
| 335 | dfa.next_state_untagged_unchecked(cache, $sid, byte) |
| 336 | }}; |
| 337 | } |
| 338 | cache.search_start(at); |
| 339 | loop { |
| 340 | if sid.is_tagged() { |
| 341 | cache.search_update(at); |
| 342 | sid = dfa |
| 343 | .next_state(cache, sid, input.haystack()[at]) |
| 344 | .map_err(|_| gave_up(at))?; |
| 345 | } else { |
| 346 | // SAFETY: See comments in 'find_fwd' for a safety argument. |
| 347 | // |
| 348 | // PERF: The comments in 'find_fwd' also provide a justification |
| 349 | // from a performance perspective as to 1) why we elide bounds |
| 350 | // checks and 2) why we do a specialized version of unrolling |
| 351 | // below. The reverse search does have a slightly different |
| 352 | // consideration in that most reverse searches tend to be |
| 353 | // anchored and on shorter haystacks. However, this still makes a |
| 354 | // difference. Take this command for example: |
| 355 | // |
| 356 | // regex-cli find match hybrid -p '(?m)^.+$' -UBb bigfile |
| 357 | // |
| 358 | // (Notice that we use 'find hybrid regex', not 'find hybrid dfa' |
| 359 | // like in the justification for the forward direction. The 'regex' |
| 360 | // sub-command will find start-of-match and thus run the reverse |
| 361 | // direction.) |
| 362 | // |
| 363 | // Without unrolling below, the above command takes around 3.76s. |
| 364 | // But with the unrolling below, we get down to 2.55s. If we keep |
| 365 | // the unrolling but add in bounds checks, then we get 2.86s. |
| 366 | // |
| 367 | // NOTE: I used 'OpenSubtitles2018.raw.sample.en' for 'bigfile'. |
| 368 | let mut prev_sid = sid; |
| 369 | while at >= input.start() { |
| 370 | prev_sid = unsafe { next_unchecked!(sid, at) }; |
| 371 | if prev_sid.is_tagged() |
| 372 | || at <= input.start().saturating_add(3) |
| 373 | { |
| 374 | core::mem::swap(&mut prev_sid, &mut sid); |
| 375 | break; |
| 376 | } |
| 377 | at -= 1; |
| 378 | |
| 379 | sid = unsafe { next_unchecked!(prev_sid, at) }; |
| 380 | if sid.is_tagged() { |
| 381 | break; |
| 382 | } |
| 383 | at -= 1; |
| 384 | |
| 385 | prev_sid = unsafe { next_unchecked!(sid, at) }; |
| 386 | if prev_sid.is_tagged() { |
| 387 | core::mem::swap(&mut prev_sid, &mut sid); |
| 388 | break; |
| 389 | } |
| 390 | at -= 1; |
| 391 | |
| 392 | sid = unsafe { next_unchecked!(prev_sid, at) }; |
| 393 | if sid.is_tagged() { |
| 394 | break; |
| 395 | } |
| 396 | at -= 1; |
| 397 | } |
| 398 | // If we quit out of the code above with an unknown state ID at |
| 399 | // any point, then we need to re-compute that transition using |
| 400 | // 'next_state', which will do NFA powerset construction for us. |
| 401 | if sid.is_unknown() { |
| 402 | cache.search_update(at); |
| 403 | sid = dfa |
| 404 | .next_state(cache, prev_sid, input.haystack()[at]) |
| 405 | .map_err(|_| gave_up(at))?; |
| 406 | } |
| 407 | } |
| 408 | if sid.is_tagged() { |
| 409 | if sid.is_start() { |
| 410 | // do nothing |
| 411 | } else if sid.is_match() { |
| 412 | let pattern = dfa.match_pattern(cache, sid, 0); |
| 413 | // Since reverse searches report the beginning of a match |
| 414 | // and the beginning is inclusive (not exclusive like the |
| 415 | // end of a match), we add 1 to make it inclusive. |
| 416 | mat = Some(HalfMatch::new(pattern, at + 1)); |
| 417 | if earliest { |
| 418 | cache.search_finish(at); |
| 419 | return Ok(mat); |
| 420 | } |
| 421 | } else if sid.is_dead() { |
| 422 | cache.search_finish(at); |
| 423 | return Ok(mat); |
| 424 | } else if sid.is_quit() { |
| 425 | cache.search_finish(at); |
| 426 | return Err(MatchError::quit(input.haystack()[at], at)); |
| 427 | } else { |
| 428 | debug_assert!(sid.is_unknown()); |
| 429 | unreachable!("sid being unknown is a bug" ); |
| 430 | } |
| 431 | } |
| 432 | if at == input.start() { |
| 433 | break; |
| 434 | } |
| 435 | at -= 1; |
| 436 | } |
| 437 | cache.search_finish(input.start()); |
| 438 | eoi_rev(dfa, cache, input, &mut sid, &mut mat)?; |
| 439 | Ok(mat) |
| 440 | } |
| 441 | |
| 442 | #[inline (never)] |
| 443 | pub(crate) fn find_overlapping_fwd( |
| 444 | dfa: &DFA, |
| 445 | cache: &mut Cache, |
| 446 | input: &Input<'_>, |
| 447 | state: &mut OverlappingState, |
| 448 | ) -> Result<(), MatchError> { |
| 449 | state.mat = None; |
| 450 | if input.is_done() { |
| 451 | return Ok(()); |
| 452 | } |
| 453 | let pre: Option<&Prefilter> = if input.get_anchored().is_anchored() { |
| 454 | None |
| 455 | } else { |
| 456 | dfa.get_config().get_prefilter() |
| 457 | }; |
| 458 | if pre.is_some() { |
| 459 | find_overlapping_fwd_imp(dfa, cache, input, pre, state) |
| 460 | } else { |
| 461 | find_overlapping_fwd_imp(dfa, cache, input, pre:None, state) |
| 462 | } |
| 463 | } |
| 464 | |
| 465 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
| 466 | fn find_overlapping_fwd_imp( |
| 467 | dfa: &DFA, |
| 468 | cache: &mut Cache, |
| 469 | input: &Input<'_>, |
| 470 | pre: Option<&'_ Prefilter>, |
| 471 | state: &mut OverlappingState, |
| 472 | ) -> Result<(), MatchError> { |
| 473 | // See 'prefilter_restart' docs for explanation. |
| 474 | let universal_start = dfa.get_nfa().look_set_prefix_any().is_empty(); |
| 475 | let mut sid = match state.id { |
| 476 | None => { |
| 477 | state.at = input.start(); |
| 478 | init_fwd(dfa, cache, input)? |
| 479 | } |
| 480 | Some(sid) => { |
| 481 | if let Some(match_index) = state.next_match_index { |
| 482 | let match_len = dfa.match_len(cache, sid); |
| 483 | if match_index < match_len { |
| 484 | state.next_match_index = Some(match_index + 1); |
| 485 | let pattern = dfa.match_pattern(cache, sid, match_index); |
| 486 | state.mat = Some(HalfMatch::new(pattern, state.at)); |
| 487 | return Ok(()); |
| 488 | } |
| 489 | } |
| 490 | // Once we've reported all matches at a given position, we need to |
| 491 | // advance the search to the next position. |
| 492 | state.at += 1; |
| 493 | if state.at > input.end() { |
| 494 | return Ok(()); |
| 495 | } |
| 496 | sid |
| 497 | } |
| 498 | }; |
| 499 | |
| 500 | // NOTE: We don't optimize the crap out of this routine primarily because |
| 501 | // it seems like most overlapping searches will have higher match counts, |
| 502 | // and thus, throughput is perhaps not as important. But if you have a use |
| 503 | // case for something faster, feel free to file an issue. |
| 504 | cache.search_start(state.at); |
| 505 | while state.at < input.end() { |
| 506 | sid = dfa |
| 507 | .next_state(cache, sid, input.haystack()[state.at]) |
| 508 | .map_err(|_| gave_up(state.at))?; |
| 509 | if sid.is_tagged() { |
| 510 | state.id = Some(sid); |
| 511 | if sid.is_start() { |
| 512 | if let Some(ref pre) = pre { |
| 513 | let span = Span::from(state.at..input.end()); |
| 514 | match pre.find(input.haystack(), span) { |
| 515 | None => return Ok(()), |
| 516 | Some(ref span) => { |
| 517 | if span.start > state.at { |
| 518 | state.at = span.start; |
| 519 | if !universal_start { |
| 520 | sid = prefilter_restart( |
| 521 | dfa, cache, &input, state.at, |
| 522 | )?; |
| 523 | } |
| 524 | continue; |
| 525 | } |
| 526 | } |
| 527 | } |
| 528 | } |
| 529 | } else if sid.is_match() { |
| 530 | state.next_match_index = Some(1); |
| 531 | let pattern = dfa.match_pattern(cache, sid, 0); |
| 532 | state.mat = Some(HalfMatch::new(pattern, state.at)); |
| 533 | cache.search_finish(state.at); |
| 534 | return Ok(()); |
| 535 | } else if sid.is_dead() { |
| 536 | cache.search_finish(state.at); |
| 537 | return Ok(()); |
| 538 | } else if sid.is_quit() { |
| 539 | cache.search_finish(state.at); |
| 540 | return Err(MatchError::quit( |
| 541 | input.haystack()[state.at], |
| 542 | state.at, |
| 543 | )); |
| 544 | } else { |
| 545 | debug_assert!(sid.is_unknown()); |
| 546 | unreachable!("sid being unknown is a bug" ); |
| 547 | } |
| 548 | } |
| 549 | state.at += 1; |
| 550 | cache.search_update(state.at); |
| 551 | } |
| 552 | |
| 553 | let result = eoi_fwd(dfa, cache, input, &mut sid, &mut state.mat); |
| 554 | state.id = Some(sid); |
| 555 | if state.mat.is_some() { |
| 556 | // '1' is always correct here since if we get to this point, this |
| 557 | // always corresponds to the first (index '0') match discovered at |
| 558 | // this position. So the next match to report at this position (if |
| 559 | // it exists) is at index '1'. |
| 560 | state.next_match_index = Some(1); |
| 561 | } |
| 562 | cache.search_finish(input.end()); |
| 563 | result |
| 564 | } |
| 565 | |
| 566 | #[inline (never)] |
| 567 | pub(crate) fn find_overlapping_rev( |
| 568 | dfa: &DFA, |
| 569 | cache: &mut Cache, |
| 570 | input: &Input<'_>, |
| 571 | state: &mut OverlappingState, |
| 572 | ) -> Result<(), MatchError> { |
| 573 | state.mat = None; |
| 574 | if input.is_done() { |
| 575 | return Ok(()); |
| 576 | } |
| 577 | let mut sid = match state.id { |
| 578 | None => { |
| 579 | let sid = init_rev(dfa, cache, input)?; |
| 580 | state.id = Some(sid); |
| 581 | if input.start() == input.end() { |
| 582 | state.rev_eoi = true; |
| 583 | } else { |
| 584 | state.at = input.end() - 1; |
| 585 | } |
| 586 | sid |
| 587 | } |
| 588 | Some(sid) => { |
| 589 | if let Some(match_index) = state.next_match_index { |
| 590 | let match_len = dfa.match_len(cache, sid); |
| 591 | if match_index < match_len { |
| 592 | state.next_match_index = Some(match_index + 1); |
| 593 | let pattern = dfa.match_pattern(cache, sid, match_index); |
| 594 | state.mat = Some(HalfMatch::new(pattern, state.at)); |
| 595 | return Ok(()); |
| 596 | } |
| 597 | } |
| 598 | // Once we've reported all matches at a given position, we need |
| 599 | // to advance the search to the next position. However, if we've |
| 600 | // already followed the EOI transition, then we know we're done |
| 601 | // with the search and there cannot be any more matches to report. |
| 602 | if state.rev_eoi { |
| 603 | return Ok(()); |
| 604 | } else if state.at == input.start() { |
| 605 | // At this point, we should follow the EOI transition. This |
| 606 | // will cause us the skip the main loop below and fall through |
| 607 | // to the final 'eoi_rev' transition. |
| 608 | state.rev_eoi = true; |
| 609 | } else { |
| 610 | // We haven't hit the end of the search yet, so move on. |
| 611 | state.at -= 1; |
| 612 | } |
| 613 | sid |
| 614 | } |
| 615 | }; |
| 616 | cache.search_start(state.at); |
| 617 | while !state.rev_eoi { |
| 618 | sid = dfa |
| 619 | .next_state(cache, sid, input.haystack()[state.at]) |
| 620 | .map_err(|_| gave_up(state.at))?; |
| 621 | if sid.is_tagged() { |
| 622 | state.id = Some(sid); |
| 623 | if sid.is_start() { |
| 624 | // do nothing |
| 625 | } else if sid.is_match() { |
| 626 | state.next_match_index = Some(1); |
| 627 | let pattern = dfa.match_pattern(cache, sid, 0); |
| 628 | state.mat = Some(HalfMatch::new(pattern, state.at + 1)); |
| 629 | cache.search_finish(state.at); |
| 630 | return Ok(()); |
| 631 | } else if sid.is_dead() { |
| 632 | cache.search_finish(state.at); |
| 633 | return Ok(()); |
| 634 | } else if sid.is_quit() { |
| 635 | cache.search_finish(state.at); |
| 636 | return Err(MatchError::quit( |
| 637 | input.haystack()[state.at], |
| 638 | state.at, |
| 639 | )); |
| 640 | } else { |
| 641 | debug_assert!(sid.is_unknown()); |
| 642 | unreachable!("sid being unknown is a bug" ); |
| 643 | } |
| 644 | } |
| 645 | if state.at == input.start() { |
| 646 | break; |
| 647 | } |
| 648 | state.at -= 1; |
| 649 | cache.search_update(state.at); |
| 650 | } |
| 651 | |
| 652 | let result = eoi_rev(dfa, cache, input, &mut sid, &mut state.mat); |
| 653 | state.rev_eoi = true; |
| 654 | state.id = Some(sid); |
| 655 | if state.mat.is_some() { |
| 656 | // '1' is always correct here since if we get to this point, this |
| 657 | // always corresponds to the first (index '0') match discovered at |
| 658 | // this position. So the next match to report at this position (if |
| 659 | // it exists) is at index '1'. |
| 660 | state.next_match_index = Some(1); |
| 661 | } |
| 662 | cache.search_finish(input.start()); |
| 663 | result |
| 664 | } |
| 665 | |
| 666 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
| 667 | fn init_fwd( |
| 668 | dfa: &DFA, |
| 669 | cache: &mut Cache, |
| 670 | input: &Input<'_>, |
| 671 | ) -> Result<LazyStateID, MatchError> { |
| 672 | let sid: LazyStateID = dfa.start_state_forward(cache, input)?; |
| 673 | // Start states can never be match states, since all matches are delayed |
| 674 | // by 1 byte. |
| 675 | debug_assert!(!sid.is_match()); |
| 676 | Ok(sid) |
| 677 | } |
| 678 | |
| 679 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
| 680 | fn init_rev( |
| 681 | dfa: &DFA, |
| 682 | cache: &mut Cache, |
| 683 | input: &Input<'_>, |
| 684 | ) -> Result<LazyStateID, MatchError> { |
| 685 | let sid: LazyStateID = dfa.start_state_reverse(cache, input)?; |
| 686 | // Start states can never be match states, since all matches are delayed |
| 687 | // by 1 byte. |
| 688 | debug_assert!(!sid.is_match()); |
| 689 | Ok(sid) |
| 690 | } |
| 691 | |
| 692 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
| 693 | fn eoi_fwd( |
| 694 | dfa: &DFA, |
| 695 | cache: &mut Cache, |
| 696 | input: &Input<'_>, |
| 697 | sid: &mut LazyStateID, |
| 698 | mat: &mut Option<HalfMatch>, |
| 699 | ) -> Result<(), MatchError> { |
| 700 | let sp = input.get_span(); |
| 701 | match input.haystack().get(sp.end) { |
| 702 | Some(&b) => { |
| 703 | *sid = |
| 704 | dfa.next_state(cache, *sid, b).map_err(|_| gave_up(sp.end))?; |
| 705 | if sid.is_match() { |
| 706 | let pattern = dfa.match_pattern(cache, *sid, 0); |
| 707 | *mat = Some(HalfMatch::new(pattern, sp.end)); |
| 708 | } else if sid.is_quit() { |
| 709 | return Err(MatchError::quit(b, sp.end)); |
| 710 | } |
| 711 | } |
| 712 | None => { |
| 713 | *sid = dfa |
| 714 | .next_eoi_state(cache, *sid) |
| 715 | .map_err(|_| gave_up(input.haystack().len()))?; |
| 716 | if sid.is_match() { |
| 717 | let pattern = dfa.match_pattern(cache, *sid, 0); |
| 718 | *mat = Some(HalfMatch::new(pattern, input.haystack().len())); |
| 719 | } |
| 720 | // N.B. We don't have to check 'is_quit' here because the EOI |
| 721 | // transition can never lead to a quit state. |
| 722 | debug_assert!(!sid.is_quit()); |
| 723 | } |
| 724 | } |
| 725 | Ok(()) |
| 726 | } |
| 727 | |
| 728 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
| 729 | fn eoi_rev( |
| 730 | dfa: &DFA, |
| 731 | cache: &mut Cache, |
| 732 | input: &Input<'_>, |
| 733 | sid: &mut LazyStateID, |
| 734 | mat: &mut Option<HalfMatch>, |
| 735 | ) -> Result<(), MatchError> { |
| 736 | let sp = input.get_span(); |
| 737 | if sp.start > 0 { |
| 738 | let byte = input.haystack()[sp.start - 1]; |
| 739 | *sid = dfa |
| 740 | .next_state(cache, *sid, byte) |
| 741 | .map_err(|_| gave_up(sp.start))?; |
| 742 | if sid.is_match() { |
| 743 | let pattern = dfa.match_pattern(cache, *sid, 0); |
| 744 | *mat = Some(HalfMatch::new(pattern, sp.start)); |
| 745 | } else if sid.is_quit() { |
| 746 | return Err(MatchError::quit(byte, sp.start - 1)); |
| 747 | } |
| 748 | } else { |
| 749 | *sid = |
| 750 | dfa.next_eoi_state(cache, *sid).map_err(|_| gave_up(sp.start))?; |
| 751 | if sid.is_match() { |
| 752 | let pattern = dfa.match_pattern(cache, *sid, 0); |
| 753 | *mat = Some(HalfMatch::new(pattern, 0)); |
| 754 | } |
| 755 | // N.B. We don't have to check 'is_quit' here because the EOI |
| 756 | // transition can never lead to a quit state. |
| 757 | debug_assert!(!sid.is_quit()); |
| 758 | } |
| 759 | Ok(()) |
| 760 | } |
| 761 | |
| 762 | /// Re-compute the starting state that a DFA should be in after finding a |
| 763 | /// prefilter candidate match at the position `at`. |
| 764 | /// |
| 765 | /// It is always correct to call this, but not always necessary. Namely, |
| 766 | /// whenever the DFA has a universal start state, the DFA can remain in the |
| 767 | /// start state that it was in when it ran the prefilter. Why? Because in that |
| 768 | /// case, there is only one start state. |
| 769 | /// |
| 770 | /// When does a DFA have a universal start state? In precisely cases where |
| 771 | /// it has no look-around assertions in its prefix. So for example, `\bfoo` |
| 772 | /// does not have a universal start state because the start state depends on |
| 773 | /// whether the byte immediately before the start position is a word byte or |
| 774 | /// not. However, `foo\b` does have a universal start state because the word |
| 775 | /// boundary does not appear in the pattern's prefix. |
| 776 | /// |
| 777 | /// So... most cases don't need this, but when a pattern doesn't have a |
| 778 | /// universal start state, then after a prefilter candidate has been found, the |
| 779 | /// current state *must* be re-litigated as if computing the start state at the |
| 780 | /// beginning of the search because it might change. That is, not all start |
| 781 | /// states are created equal. |
| 782 | /// |
| 783 | /// Why avoid it? Because while it's not super expensive, it isn't a trivial |
| 784 | /// operation to compute the start state. It is much better to avoid it and |
| 785 | /// just state in the current state if you know it to be correct. |
| 786 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
| 787 | fn prefilter_restart( |
| 788 | dfa: &DFA, |
| 789 | cache: &mut Cache, |
| 790 | input: &Input<'_>, |
| 791 | at: usize, |
| 792 | ) -> Result<LazyStateID, MatchError> { |
| 793 | let mut input: Input<'_> = input.clone(); |
| 794 | input.set_start(at); |
| 795 | init_fwd(dfa, cache, &input) |
| 796 | } |
| 797 | |
| 798 | /// A convenience routine for constructing a "gave up" match error. |
| 799 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
| 800 | fn gave_up(offset: usize) -> MatchError { |
| 801 | MatchError::gave_up(offset) |
| 802 | } |
| 803 | |