1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | use regex_automata::dfa::sparse::DFA; |
6 | use regex_automata::dfa::Automaton; |
7 | use regex_automata::util::id::StateID; |
8 | use writeable::Writeable; |
9 | |
10 | pub trait LazyAutomaton: Automaton { |
11 | // Like Automaton::find_earliest_fwd, but doesn't require a materialized string. |
12 | fn matches_earliest_fwd_lazy<S: Writeable + ?Sized>(&self, haystack: &S) -> bool; |
13 | } |
14 | |
15 | impl<T: AsRef<[u8]>> LazyAutomaton for DFA<T> { |
16 | fn matches_earliest_fwd_lazy<S: Writeable + ?Sized>(&self, haystack: &S) -> bool { |
17 | struct DFAStepper<'a> { |
18 | dfa: &'a DFA<&'a [u8]>, |
19 | state: StateID, |
20 | } |
21 | |
22 | impl core::fmt::Write for DFAStepper<'_> { |
23 | fn write_str(&mut self, s: &str) -> core::fmt::Result { |
24 | for &byte in s.as_bytes() { |
25 | self.state = self.dfa.next_state(self.state, byte); |
26 | if self.dfa.is_match_state(self.state) || self.dfa.is_dead_state(self.state) { |
27 | // We matched or are in a no-match-cycle, return early |
28 | return Err(core::fmt::Error); |
29 | } |
30 | } |
31 | Ok(()) |
32 | } |
33 | } |
34 | |
35 | let mut stepper = DFAStepper { |
36 | // If start == 0 the start state does not depend on the actual string, so |
37 | // we can just pass an empty slice. |
38 | state: self.start_state_forward(None, &[], 0, 0), |
39 | dfa: &self.as_ref(), |
40 | }; |
41 | |
42 | if haystack.write_to(&mut stepper).is_ok() { |
43 | stepper.state = self.next_eoi_state(stepper.state); |
44 | } |
45 | |
46 | self.is_match_state(stepper.state) |
47 | } |
48 | } |
49 | |
50 | #[cfg (test)] |
51 | #[test ] |
52 | fn test() { |
53 | use crate::provider::SerdeDFA; |
54 | use alloc::borrow::Cow; |
55 | |
56 | let matcher = SerdeDFA::new(Cow::Borrowed("11(000)*$" )).unwrap(); |
57 | |
58 | for writeable in [1i32, 11, 110, 11000, 211000] { |
59 | assert_eq!( |
60 | matcher |
61 | .deref() |
62 | .find_earliest_fwd(writeable.write_to_string().as_bytes()) |
63 | .unwrap() |
64 | .is_some(), |
65 | matcher.deref().matches_earliest_fwd_lazy(&writeable) |
66 | ); |
67 | } |
68 | |
69 | struct ExitEarlyTest; |
70 | |
71 | impl writeable::Writeable for ExitEarlyTest { |
72 | fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { |
73 | sink.write_str("12" )?; |
74 | unreachable!() |
75 | } |
76 | } |
77 | |
78 | assert!(!matcher.deref().matches_earliest_fwd_lazy(&ExitEarlyTest)); |
79 | } |
80 | |