| 1 | // This file is part of ICU4X. For terms of use, please see the file |
| 2 | // called LICENSE at the top level of the ICU4X source tree |
| 3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
| 4 | |
| 5 | use regex_automata::dfa::sparse::DFA; |
| 6 | use regex_automata::dfa::Automaton; |
| 7 | use regex_automata::util::id::StateID; |
| 8 | use writeable::Writeable; |
| 9 | |
| 10 | pub trait LazyAutomaton: Automaton { |
| 11 | // Like Automaton::find_earliest_fwd, but doesn't require a materialized string. |
| 12 | fn matches_earliest_fwd_lazy<S: Writeable + ?Sized>(&self, haystack: &S) -> bool; |
| 13 | } |
| 14 | |
| 15 | impl<T: AsRef<[u8]>> LazyAutomaton for DFA<T> { |
| 16 | fn matches_earliest_fwd_lazy<S: Writeable + ?Sized>(&self, haystack: &S) -> bool { |
| 17 | struct DFAStepper<'a> { |
| 18 | dfa: &'a DFA<&'a [u8]>, |
| 19 | state: StateID, |
| 20 | } |
| 21 | |
| 22 | impl core::fmt::Write for DFAStepper<'_> { |
| 23 | fn write_str(&mut self, s: &str) -> core::fmt::Result { |
| 24 | for &byte in s.as_bytes() { |
| 25 | self.state = self.dfa.next_state(self.state, byte); |
| 26 | if self.dfa.is_match_state(self.state) || self.dfa.is_dead_state(self.state) { |
| 27 | // We matched or are in a no-match-cycle, return early |
| 28 | return Err(core::fmt::Error); |
| 29 | } |
| 30 | } |
| 31 | Ok(()) |
| 32 | } |
| 33 | } |
| 34 | |
| 35 | let mut stepper = DFAStepper { |
| 36 | // If start == 0 the start state does not depend on the actual string, so |
| 37 | // we can just pass an empty slice. |
| 38 | state: self.start_state_forward(None, &[], 0, 0), |
| 39 | dfa: &self.as_ref(), |
| 40 | }; |
| 41 | |
| 42 | if haystack.write_to(&mut stepper).is_ok() { |
| 43 | stepper.state = self.next_eoi_state(stepper.state); |
| 44 | } |
| 45 | |
| 46 | self.is_match_state(stepper.state) |
| 47 | } |
| 48 | } |
| 49 | |
| 50 | #[cfg (test)] |
| 51 | #[test ] |
| 52 | fn test() { |
| 53 | use crate::provider::SerdeDFA; |
| 54 | use alloc::borrow::Cow; |
| 55 | |
| 56 | let matcher = SerdeDFA::new(Cow::Borrowed("11(000)*$" )).unwrap(); |
| 57 | |
| 58 | for writeable in [1i32, 11, 110, 11000, 211000] { |
| 59 | assert_eq!( |
| 60 | matcher |
| 61 | .deref() |
| 62 | .find_earliest_fwd(writeable.write_to_string().as_bytes()) |
| 63 | .unwrap() |
| 64 | .is_some(), |
| 65 | matcher.deref().matches_earliest_fwd_lazy(&writeable) |
| 66 | ); |
| 67 | } |
| 68 | |
| 69 | struct ExitEarlyTest; |
| 70 | |
| 71 | impl writeable::Writeable for ExitEarlyTest { |
| 72 | fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { |
| 73 | sink.write_str("12" )?; |
| 74 | unreachable!() |
| 75 | } |
| 76 | } |
| 77 | |
| 78 | assert!(!matcher.deref().matches_earliest_fwd_lazy(&ExitEarlyTest)); |
| 79 | } |
| 80 | |