| 1 | use regex_syntax::{ast, hir}; |
| 2 | |
| 3 | use crate::{nfa, util::search::MatchError, PatternID}; |
| 4 | |
| 5 | /// An error that occurs when construction of a `Regex` fails. |
| 6 | /// |
| 7 | /// A build error is generally a result of one of two possible failure |
| 8 | /// modes. First is a parse or syntax error in the concrete syntax of a |
| 9 | /// pattern. Second is that the construction of the underlying regex matcher |
| 10 | /// fails, usually because it gets too big with respect to limits like |
| 11 | /// [`Config::nfa_size_limit`](crate::meta::Config::nfa_size_limit). |
| 12 | /// |
| 13 | /// This error provides very little introspection capabilities. You can: |
| 14 | /// |
| 15 | /// * Ask for the [`PatternID`] of the pattern that caused an error, if one |
| 16 | /// is available. This is available for things like syntax errors, but not for |
| 17 | /// cases where build limits are exceeded. |
| 18 | /// * Ask for the underlying syntax error, but only if the error is a syntax |
| 19 | /// error. |
| 20 | /// * Ask for a human readable message corresponding to the underlying error. |
| 21 | /// * The `BuildError::source` method (from the `std::error::Error` |
| 22 | /// trait implementation) may be used to query for an underlying error if one |
| 23 | /// exists. There are no API guarantees about which error is returned. |
| 24 | /// |
| 25 | /// When the `std` feature is enabled, this implements `std::error::Error`. |
| 26 | #[derive (Clone, Debug)] |
| 27 | pub struct BuildError { |
| 28 | kind: BuildErrorKind, |
| 29 | } |
| 30 | |
| 31 | #[derive (Clone, Debug)] |
| 32 | enum BuildErrorKind { |
| 33 | Syntax { pid: PatternID, err: regex_syntax::Error }, |
| 34 | NFA(nfa::thompson::BuildError), |
| 35 | } |
| 36 | |
| 37 | impl BuildError { |
| 38 | /// If it is known which pattern ID caused this build error to occur, then |
| 39 | /// this method returns it. |
| 40 | /// |
| 41 | /// Some errors are not associated with a particular pattern. However, any |
| 42 | /// errors that occur as part of parsing a pattern are guaranteed to be |
| 43 | /// associated with a pattern ID. |
| 44 | /// |
| 45 | /// # Example |
| 46 | /// |
| 47 | /// ``` |
| 48 | /// use regex_automata::{meta::Regex, PatternID}; |
| 49 | /// |
| 50 | /// let err = Regex::new_many(&["a" , "b" , r"\p{Foo}" , "c" ]).unwrap_err(); |
| 51 | /// assert_eq!(Some(PatternID::must(2)), err.pattern()); |
| 52 | /// ``` |
| 53 | pub fn pattern(&self) -> Option<PatternID> { |
| 54 | match self.kind { |
| 55 | BuildErrorKind::Syntax { pid, .. } => Some(pid), |
| 56 | _ => None, |
| 57 | } |
| 58 | } |
| 59 | |
| 60 | /// If this error occurred because the regex exceeded the configured size |
| 61 | /// limit before being built, then this returns the configured size limit. |
| 62 | /// |
| 63 | /// The limit returned is what was configured, and corresponds to the |
| 64 | /// maximum amount of heap usage in bytes. |
| 65 | pub fn size_limit(&self) -> Option<usize> { |
| 66 | match self.kind { |
| 67 | BuildErrorKind::NFA(ref err) => err.size_limit(), |
| 68 | _ => None, |
| 69 | } |
| 70 | } |
| 71 | |
| 72 | /// If this error corresponds to a syntax error, then a reference to it is |
| 73 | /// returned by this method. |
| 74 | pub fn syntax_error(&self) -> Option<®ex_syntax::Error> { |
| 75 | match self.kind { |
| 76 | BuildErrorKind::Syntax { ref err, .. } => Some(err), |
| 77 | _ => None, |
| 78 | } |
| 79 | } |
| 80 | |
| 81 | pub(crate) fn ast(pid: PatternID, err: ast::Error) -> BuildError { |
| 82 | let err = regex_syntax::Error::from(err); |
| 83 | BuildError { kind: BuildErrorKind::Syntax { pid, err } } |
| 84 | } |
| 85 | |
| 86 | pub(crate) fn hir(pid: PatternID, err: hir::Error) -> BuildError { |
| 87 | let err = regex_syntax::Error::from(err); |
| 88 | BuildError { kind: BuildErrorKind::Syntax { pid, err } } |
| 89 | } |
| 90 | |
| 91 | pub(crate) fn nfa(err: nfa::thompson::BuildError) -> BuildError { |
| 92 | BuildError { kind: BuildErrorKind::NFA(err) } |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | #[cfg (feature = "std" )] |
| 97 | impl std::error::Error for BuildError { |
| 98 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { |
| 99 | match self.kind { |
| 100 | BuildErrorKind::Syntax { ref err: &Error, .. } => Some(err), |
| 101 | BuildErrorKind::NFA(ref err: &BuildError) => Some(err), |
| 102 | } |
| 103 | } |
| 104 | } |
| 105 | |
| 106 | impl core::fmt::Display for BuildError { |
| 107 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
| 108 | match self.kind { |
| 109 | BuildErrorKind::Syntax { pid: PatternID, .. } => { |
| 110 | write!(f, "error parsing pattern {}" , pid.as_usize()) |
| 111 | } |
| 112 | BuildErrorKind::NFA(_) => write!(f, "error building NFA" ), |
| 113 | } |
| 114 | } |
| 115 | } |
| 116 | |
| 117 | /// An error that occurs when a search should be retried. |
| 118 | /// |
| 119 | /// This retry error distinguishes between two different failure modes. |
| 120 | /// |
| 121 | /// The first is one where potential quadratic behavior has been detected. |
| 122 | /// In this case, whatever optimization that led to this behavior should be |
| 123 | /// stopped, and the next best strategy should be used. |
| 124 | /// |
| 125 | /// The second indicates that the underlying regex engine has failed for some |
| 126 | /// reason. This usually occurs because either a lazy DFA's cache has become |
| 127 | /// ineffective or because a non-ASCII byte has been seen *and* a Unicode word |
| 128 | /// boundary was used in one of the patterns. In this failure case, a different |
| 129 | /// regex engine that won't fail in these ways (PikeVM, backtracker or the |
| 130 | /// one-pass DFA) should be used. |
| 131 | /// |
| 132 | /// This is an internal error only and should never bleed into the public |
| 133 | /// API. |
| 134 | #[derive (Debug)] |
| 135 | pub(crate) enum RetryError { |
| 136 | Quadratic(RetryQuadraticError), |
| 137 | Fail(RetryFailError), |
| 138 | } |
| 139 | |
| 140 | #[cfg (feature = "std" )] |
| 141 | impl std::error::Error for RetryError {} |
| 142 | |
| 143 | impl core::fmt::Display for RetryError { |
| 144 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
| 145 | match *self { |
| 146 | RetryError::Quadratic(ref err: &RetryQuadraticError) => err.fmt(f), |
| 147 | RetryError::Fail(ref err: &RetryFailError) => err.fmt(f), |
| 148 | } |
| 149 | } |
| 150 | } |
| 151 | |
| 152 | impl From<MatchError> for RetryError { |
| 153 | fn from(merr: MatchError) -> RetryError { |
| 154 | RetryError::Fail(RetryFailError::from(merr)) |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | /// An error that occurs when potential quadratic behavior has been detected |
| 159 | /// when applying either the "reverse suffix" or "reverse inner" optimizations. |
| 160 | /// |
| 161 | /// When this error occurs, callers should abandon the "reverse" optimization |
| 162 | /// and use a normal forward search. |
| 163 | #[derive (Debug)] |
| 164 | pub(crate) struct RetryQuadraticError(()); |
| 165 | |
| 166 | impl RetryQuadraticError { |
| 167 | pub(crate) fn new() -> RetryQuadraticError { |
| 168 | RetryQuadraticError(()) |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | #[cfg (feature = "std" )] |
| 173 | impl std::error::Error for RetryQuadraticError {} |
| 174 | |
| 175 | impl core::fmt::Display for RetryQuadraticError { |
| 176 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
| 177 | write!(f, "regex engine gave up to avoid quadratic behavior" ) |
| 178 | } |
| 179 | } |
| 180 | |
| 181 | impl From<RetryQuadraticError> for RetryError { |
| 182 | fn from(err: RetryQuadraticError) -> RetryError { |
| 183 | RetryError::Quadratic(err) |
| 184 | } |
| 185 | } |
| 186 | |
| 187 | /// An error that occurs when a regex engine "gives up" for some reason before |
| 188 | /// finishing a search. Usually this occurs because of heuristic Unicode word |
| 189 | /// boundary support or because of ineffective cache usage in the lazy DFA. |
| 190 | /// |
| 191 | /// When this error occurs, callers should retry the regex search with a |
| 192 | /// different regex engine. |
| 193 | /// |
| 194 | /// Note that this has convenient `From` impls that will automatically |
| 195 | /// convert a `MatchError` into this error. This works because the meta |
| 196 | /// regex engine internals guarantee that errors like `HaystackTooLong` and |
| 197 | /// `UnsupportedAnchored` will never occur. The only errors left are `Quit` and |
| 198 | /// `GaveUp`, which both correspond to this "failure" error. |
| 199 | #[derive (Debug)] |
| 200 | pub(crate) struct RetryFailError { |
| 201 | offset: usize, |
| 202 | } |
| 203 | |
| 204 | impl RetryFailError { |
| 205 | pub(crate) fn from_offset(offset: usize) -> RetryFailError { |
| 206 | RetryFailError { offset } |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | #[cfg (feature = "std" )] |
| 211 | impl std::error::Error for RetryFailError {} |
| 212 | |
| 213 | impl core::fmt::Display for RetryFailError { |
| 214 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
| 215 | write!(f, "regex engine failed at offset {:?}" , self.offset) |
| 216 | } |
| 217 | } |
| 218 | |
| 219 | impl From<RetryFailError> for RetryError { |
| 220 | fn from(err: RetryFailError) -> RetryError { |
| 221 | RetryError::Fail(err) |
| 222 | } |
| 223 | } |
| 224 | |
| 225 | impl From<MatchError> for RetryFailError { |
| 226 | fn from(merr: MatchError) -> RetryFailError { |
| 227 | use crate::util::search::MatchErrorKind::*; |
| 228 | |
| 229 | match *merr.kind() { |
| 230 | Quit { offset: usize, .. } => RetryFailError::from_offset(offset), |
| 231 | GaveUp { offset: usize } => RetryFailError::from_offset(offset), |
| 232 | // These can never occur because we avoid them by construction |
| 233 | // or with higher level control flow logic. For example, the |
| 234 | // backtracker's wrapper will never hand out a backtracker engine |
| 235 | // when the haystack would be too long. |
| 236 | HaystackTooLong { .. } | UnsupportedAnchored { .. } => { |
| 237 | unreachable!("found impossible error in meta engine: {}" , merr) |
| 238 | } |
| 239 | } |
| 240 | } |
| 241 | } |
| 242 | |