1 | /// Represents the four possible starting configurations of a DFA search. |
2 | /// |
3 | /// The starting configuration is determined by inspecting the the beginning of |
4 | /// the haystack (up to 1 byte). Ultimately, this along with a pattern ID (if |
5 | /// specified) is what selects the start state to use in a DFA. |
6 | /// |
7 | /// In a DFA that doesn't have starting states for each pattern, then it will |
8 | /// have a maximum of four DFA start states. If the DFA was compiled with start |
9 | /// states for each pattern, then it will have a maximum of four DFA start |
10 | /// states for searching for any pattern, and then another maximum of four DFA |
11 | /// start states for executing an anchored search for each pattern. |
12 | /// |
13 | /// This ends up being represented as a table in the DFA (whether lazy or fully |
14 | /// built) where the stride of that table is 4, and each entry is an index into |
15 | /// the state transition table. Note though that multiple entries in the table |
16 | /// might point to the same state if the states would otherwise be equivalent. |
17 | /// (This is guaranteed by DFA minimization and may even be accomplished by |
18 | /// normal determinization, since it attempts to reuse equivalent states too.) |
19 | #[derive (Clone, Copy, Debug, Eq, PartialEq)] |
20 | pub(crate) enum Start { |
21 | /// This occurs when the starting position is not any of the ones below. |
22 | NonWordByte = 0, |
23 | /// This occurs when the byte immediately preceding the start of the search |
24 | /// is an ASCII word byte. |
25 | WordByte = 1, |
26 | /// This occurs when the starting position of the search corresponds to the |
27 | /// beginning of the haystack. |
28 | Text = 2, |
29 | /// This occurs when the byte immediately preceding the start of the search |
30 | /// is a line terminator. Specifically, `\n`. |
31 | Line = 3, |
32 | } |
33 | |
34 | impl Start { |
35 | /// Return the starting state corresponding to the given integer. If no |
36 | /// starting state exists for the given integer, then None is returned. |
37 | pub(crate) fn from_usize(n: usize) -> Option<Start> { |
38 | match n { |
39 | 0 => Some(Start::NonWordByte), |
40 | 1 => Some(Start::WordByte), |
41 | 2 => Some(Start::Text), |
42 | 3 => Some(Start::Line), |
43 | _ => None, |
44 | } |
45 | } |
46 | |
47 | /// Returns the total number of starting state configurations. |
48 | pub(crate) fn count() -> usize { |
49 | 4 |
50 | } |
51 | |
52 | /// Returns the starting state configuration for the given search |
53 | /// parameters. If the given offset range is not valid, then this panics. |
54 | #[inline (always)] |
55 | pub(crate) fn from_position_fwd( |
56 | bytes: &[u8], |
57 | start: usize, |
58 | end: usize, |
59 | ) -> Start { |
60 | assert!( |
61 | bytes.get(start..end).is_some(), |
62 | " {}.. {} is invalid" , |
63 | start, |
64 | end |
65 | ); |
66 | if start == 0 { |
67 | Start::Text |
68 | } else if bytes[start - 1] == b' \n' { |
69 | Start::Line |
70 | } else if crate::util::is_word_byte(bytes[start - 1]) { |
71 | Start::WordByte |
72 | } else { |
73 | Start::NonWordByte |
74 | } |
75 | } |
76 | |
77 | /// Returns the starting state configuration for a reverse search with the |
78 | /// given search parameters. If the given offset range is not valid, then |
79 | /// this panics. |
80 | #[inline (always)] |
81 | pub(crate) fn from_position_rev( |
82 | bytes: &[u8], |
83 | start: usize, |
84 | end: usize, |
85 | ) -> Start { |
86 | assert!( |
87 | bytes.get(start..end).is_some(), |
88 | " {}.. {} is invalid" , |
89 | start, |
90 | end |
91 | ); |
92 | if end == bytes.len() { |
93 | Start::Text |
94 | } else if bytes[end] == b' \n' { |
95 | Start::Line |
96 | } else if crate::util::is_word_byte(bytes[end]) { |
97 | Start::WordByte |
98 | } else { |
99 | Start::NonWordByte |
100 | } |
101 | } |
102 | |
103 | /// Return this starting configuration as an integer. It is guaranteed to |
104 | /// be less than `Start::count()`. |
105 | #[inline (always)] |
106 | pub(crate) fn as_usize(&self) -> usize { |
107 | *self as usize |
108 | } |
109 | } |
110 | |