1/// Represents the four possible starting configurations of a DFA search.
2///
3/// The starting configuration is determined by inspecting the the beginning of
4/// the haystack (up to 1 byte). Ultimately, this along with a pattern ID (if
5/// specified) is what selects the start state to use in a DFA.
6///
7/// In a DFA that doesn't have starting states for each pattern, then it will
8/// have a maximum of four DFA start states. If the DFA was compiled with start
9/// states for each pattern, then it will have a maximum of four DFA start
10/// states for searching for any pattern, and then another maximum of four DFA
11/// start states for executing an anchored search for each pattern.
12///
13/// This ends up being represented as a table in the DFA (whether lazy or fully
14/// built) where the stride of that table is 4, and each entry is an index into
15/// the state transition table. Note though that multiple entries in the table
16/// might point to the same state if the states would otherwise be equivalent.
17/// (This is guaranteed by DFA minimization and may even be accomplished by
18/// normal determinization, since it attempts to reuse equivalent states too.)
19#[derive(Clone, Copy, Debug, Eq, PartialEq)]
20pub(crate) enum Start {
21 /// This occurs when the starting position is not any of the ones below.
22 NonWordByte = 0,
23 /// This occurs when the byte immediately preceding the start of the search
24 /// is an ASCII word byte.
25 WordByte = 1,
26 /// This occurs when the starting position of the search corresponds to the
27 /// beginning of the haystack.
28 Text = 2,
29 /// This occurs when the byte immediately preceding the start of the search
30 /// is a line terminator. Specifically, `\n`.
31 Line = 3,
32}
33
34impl Start {
35 /// Return the starting state corresponding to the given integer. If no
36 /// starting state exists for the given integer, then None is returned.
37 pub(crate) fn from_usize(n: usize) -> Option<Start> {
38 match n {
39 0 => Some(Start::NonWordByte),
40 1 => Some(Start::WordByte),
41 2 => Some(Start::Text),
42 3 => Some(Start::Line),
43 _ => None,
44 }
45 }
46
47 /// Returns the total number of starting state configurations.
48 pub(crate) fn count() -> usize {
49 4
50 }
51
52 /// Returns the starting state configuration for the given search
53 /// parameters. If the given offset range is not valid, then this panics.
54 #[inline(always)]
55 pub(crate) fn from_position_fwd(
56 bytes: &[u8],
57 start: usize,
58 end: usize,
59 ) -> Start {
60 assert!(
61 bytes.get(start..end).is_some(),
62 "{}..{} is invalid",
63 start,
64 end
65 );
66 if start == 0 {
67 Start::Text
68 } else if bytes[start - 1] == b'\n' {
69 Start::Line
70 } else if crate::util::is_word_byte(bytes[start - 1]) {
71 Start::WordByte
72 } else {
73 Start::NonWordByte
74 }
75 }
76
77 /// Returns the starting state configuration for a reverse search with the
78 /// given search parameters. If the given offset range is not valid, then
79 /// this panics.
80 #[inline(always)]
81 pub(crate) fn from_position_rev(
82 bytes: &[u8],
83 start: usize,
84 end: usize,
85 ) -> Start {
86 assert!(
87 bytes.get(start..end).is_some(),
88 "{}..{} is invalid",
89 start,
90 end
91 );
92 if end == bytes.len() {
93 Start::Text
94 } else if bytes[end] == b'\n' {
95 Start::Line
96 } else if crate::util::is_word_byte(bytes[end]) {
97 Start::WordByte
98 } else {
99 Start::NonWordByte
100 }
101 }
102
103 /// Return this starting configuration as an integer. It is guaranteed to
104 /// be less than `Start::count()`.
105 #[inline(always)]
106 pub(crate) fn as_usize(&self) -> usize {
107 *self as usize
108 }
109}
110