| 1 | /// Represents the four possible starting configurations of a DFA search. |
| 2 | /// |
| 3 | /// The starting configuration is determined by inspecting the the beginning of |
| 4 | /// the haystack (up to 1 byte). Ultimately, this along with a pattern ID (if |
| 5 | /// specified) is what selects the start state to use in a DFA. |
| 6 | /// |
| 7 | /// In a DFA that doesn't have starting states for each pattern, then it will |
| 8 | /// have a maximum of four DFA start states. If the DFA was compiled with start |
| 9 | /// states for each pattern, then it will have a maximum of four DFA start |
| 10 | /// states for searching for any pattern, and then another maximum of four DFA |
| 11 | /// start states for executing an anchored search for each pattern. |
| 12 | /// |
| 13 | /// This ends up being represented as a table in the DFA (whether lazy or fully |
| 14 | /// built) where the stride of that table is 4, and each entry is an index into |
| 15 | /// the state transition table. Note though that multiple entries in the table |
| 16 | /// might point to the same state if the states would otherwise be equivalent. |
| 17 | /// (This is guaranteed by DFA minimization and may even be accomplished by |
| 18 | /// normal determinization, since it attempts to reuse equivalent states too.) |
| 19 | #[derive (Clone, Copy, Debug, Eq, PartialEq)] |
| 20 | pub(crate) enum Start { |
| 21 | /// This occurs when the starting position is not any of the ones below. |
| 22 | NonWordByte = 0, |
| 23 | /// This occurs when the byte immediately preceding the start of the search |
| 24 | /// is an ASCII word byte. |
| 25 | WordByte = 1, |
| 26 | /// This occurs when the starting position of the search corresponds to the |
| 27 | /// beginning of the haystack. |
| 28 | Text = 2, |
| 29 | /// This occurs when the byte immediately preceding the start of the search |
| 30 | /// is a line terminator. Specifically, `\n`. |
| 31 | Line = 3, |
| 32 | } |
| 33 | |
| 34 | impl Start { |
| 35 | /// Return the starting state corresponding to the given integer. If no |
| 36 | /// starting state exists for the given integer, then None is returned. |
| 37 | pub(crate) fn from_usize(n: usize) -> Option<Start> { |
| 38 | match n { |
| 39 | 0 => Some(Start::NonWordByte), |
| 40 | 1 => Some(Start::WordByte), |
| 41 | 2 => Some(Start::Text), |
| 42 | 3 => Some(Start::Line), |
| 43 | _ => None, |
| 44 | } |
| 45 | } |
| 46 | |
| 47 | /// Returns the total number of starting state configurations. |
| 48 | pub(crate) fn count() -> usize { |
| 49 | 4 |
| 50 | } |
| 51 | |
| 52 | /// Returns the starting state configuration for the given search |
| 53 | /// parameters. If the given offset range is not valid, then this panics. |
| 54 | #[inline (always)] |
| 55 | pub(crate) fn from_position_fwd( |
| 56 | bytes: &[u8], |
| 57 | start: usize, |
| 58 | end: usize, |
| 59 | ) -> Start { |
| 60 | assert!( |
| 61 | bytes.get(start..end).is_some(), |
| 62 | " {}.. {} is invalid" , |
| 63 | start, |
| 64 | end |
| 65 | ); |
| 66 | if start == 0 { |
| 67 | Start::Text |
| 68 | } else if bytes[start - 1] == b' \n' { |
| 69 | Start::Line |
| 70 | } else if crate::util::is_word_byte(bytes[start - 1]) { |
| 71 | Start::WordByte |
| 72 | } else { |
| 73 | Start::NonWordByte |
| 74 | } |
| 75 | } |
| 76 | |
| 77 | /// Returns the starting state configuration for a reverse search with the |
| 78 | /// given search parameters. If the given offset range is not valid, then |
| 79 | /// this panics. |
| 80 | #[inline (always)] |
| 81 | pub(crate) fn from_position_rev( |
| 82 | bytes: &[u8], |
| 83 | start: usize, |
| 84 | end: usize, |
| 85 | ) -> Start { |
| 86 | assert!( |
| 87 | bytes.get(start..end).is_some(), |
| 88 | " {}.. {} is invalid" , |
| 89 | start, |
| 90 | end |
| 91 | ); |
| 92 | if end == bytes.len() { |
| 93 | Start::Text |
| 94 | } else if bytes[end] == b' \n' { |
| 95 | Start::Line |
| 96 | } else if crate::util::is_word_byte(bytes[end]) { |
| 97 | Start::WordByte |
| 98 | } else { |
| 99 | Start::NonWordByte |
| 100 | } |
| 101 | } |
| 102 | |
| 103 | /// Return this starting configuration as an integer. It is guaranteed to |
| 104 | /// be less than `Start::count()`. |
| 105 | #[inline (always)] |
| 106 | pub(crate) fn as_usize(&self) -> usize { |
| 107 | *self as usize |
| 108 | } |
| 109 | } |
| 110 | |