1 | use regex_syntax::{ast, hir}; |
2 | |
3 | use crate::{nfa, util::search::MatchError, PatternID}; |
4 | |
5 | /// An error that occurs when construction of a `Regex` fails. |
6 | /// |
7 | /// A build error is generally a result of one of two possible failure |
8 | /// modes. First is a parse or syntax error in the concrete syntax of a |
9 | /// pattern. Second is that the construction of the underlying regex matcher |
10 | /// fails, usually because it gets too big with respect to limits like |
11 | /// [`Config::nfa_size_limit`](crate::meta::Config::nfa_size_limit). |
12 | /// |
13 | /// This error provides very little introspection capabilities. You can: |
14 | /// |
15 | /// * Ask for the [`PatternID`] of the pattern that caused an error, if one |
16 | /// is available. This is available for things like syntax errors, but not for |
17 | /// cases where build limits are exceeded. |
18 | /// * Ask for the underlying syntax error, but only if the error is a syntax |
19 | /// error. |
20 | /// * Ask for a human readable message corresponding to the underlying error. |
21 | /// * The `BuildError::source` method (from the `std::error::Error` |
22 | /// trait implementation) may be used to query for an underlying error if one |
23 | /// exists. There are no API guarantees about which error is returned. |
24 | /// |
25 | /// When the `std` feature is enabled, this implements `std::error::Error`. |
26 | #[derive(Clone, Debug)] |
27 | pub struct BuildError { |
28 | kind: BuildErrorKind, |
29 | } |
30 | |
31 | #[derive(Clone, Debug)] |
32 | enum BuildErrorKind { |
33 | Syntax { pid: PatternID, err: regex_syntax::Error }, |
34 | NFA(nfa::thompson::BuildError), |
35 | } |
36 | |
37 | impl BuildError { |
38 | /// If it is known which pattern ID caused this build error to occur, then |
39 | /// this method returns it. |
40 | /// |
41 | /// Some errors are not associated with a particular pattern. However, any |
42 | /// errors that occur as part of parsing a pattern are guaranteed to be |
43 | /// associated with a pattern ID. |
44 | /// |
45 | /// # Example |
46 | /// |
47 | /// ``` |
48 | /// use regex_automata::{meta::Regex, PatternID}; |
49 | /// |
50 | /// let err = Regex::new_many(&["a" , "b" , r"\p{Foo}" , "c" ]).unwrap_err(); |
51 | /// assert_eq!(Some(PatternID::must(2)), err.pattern()); |
52 | /// ``` |
53 | pub fn pattern(&self) -> Option<PatternID> { |
54 | match self.kind { |
55 | BuildErrorKind::Syntax { pid, .. } => Some(pid), |
56 | _ => None, |
57 | } |
58 | } |
59 | |
60 | /// If this error occurred because the regex exceeded the configured size |
61 | /// limit before being built, then this returns the configured size limit. |
62 | /// |
63 | /// The limit returned is what was configured, and corresponds to the |
64 | /// maximum amount of heap usage in bytes. |
65 | pub fn size_limit(&self) -> Option<usize> { |
66 | match self.kind { |
67 | BuildErrorKind::NFA(ref err) => err.size_limit(), |
68 | _ => None, |
69 | } |
70 | } |
71 | |
72 | /// If this error corresponds to a syntax error, then a reference to it is |
73 | /// returned by this method. |
74 | pub fn syntax_error(&self) -> Option<®ex_syntax::Error> { |
75 | match self.kind { |
76 | BuildErrorKind::Syntax { ref err, .. } => Some(err), |
77 | _ => None, |
78 | } |
79 | } |
80 | |
81 | pub(crate) fn ast(pid: PatternID, err: ast::Error) -> BuildError { |
82 | let err = regex_syntax::Error::from(err); |
83 | BuildError { kind: BuildErrorKind::Syntax { pid, err } } |
84 | } |
85 | |
86 | pub(crate) fn hir(pid: PatternID, err: hir::Error) -> BuildError { |
87 | let err = regex_syntax::Error::from(err); |
88 | BuildError { kind: BuildErrorKind::Syntax { pid, err } } |
89 | } |
90 | |
91 | pub(crate) fn nfa(err: nfa::thompson::BuildError) -> BuildError { |
92 | BuildError { kind: BuildErrorKind::NFA(err) } |
93 | } |
94 | } |
95 | |
96 | #[cfg (feature = "std" )] |
97 | impl std::error::Error for BuildError { |
98 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { |
99 | match self.kind { |
100 | BuildErrorKind::Syntax { ref err, .. } => Some(err), |
101 | BuildErrorKind::NFA(ref err) => Some(err), |
102 | } |
103 | } |
104 | } |
105 | |
106 | impl core::fmt::Display for BuildError { |
107 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
108 | match self.kind { |
109 | BuildErrorKind::Syntax { pid, .. } => { |
110 | write!(f, "error parsing pattern {}" , pid.as_usize()) |
111 | } |
112 | BuildErrorKind::NFA(_) => write!(f, "error building NFA" ), |
113 | } |
114 | } |
115 | } |
116 | |
117 | /// An error that occurs when a search should be retried. |
118 | /// |
119 | /// This retry error distinguishes between two different failure modes. |
120 | /// |
121 | /// The first is one where potential quadratic behavior has been detected. |
122 | /// In this case, whatever optimization that led to this behavior should be |
123 | /// stopped, and the next best strategy should be used. |
124 | /// |
125 | /// The second indicates that the underlying regex engine has failed for some |
126 | /// reason. This usually occurs because either a lazy DFA's cache has become |
127 | /// ineffective or because a non-ASCII byte has been seen *and* a Unicode word |
128 | /// boundary was used in one of the patterns. In this failure case, a different |
129 | /// regex engine that won't fail in these ways (PikeVM, backtracker or the |
130 | /// one-pass DFA) should be used. |
131 | /// |
132 | /// This is an internal error only and should never bleed into the public |
133 | /// API. |
134 | #[derive(Debug)] |
135 | pub(crate) enum RetryError { |
136 | Quadratic(RetryQuadraticError), |
137 | Fail(RetryFailError), |
138 | } |
139 | |
140 | #[cfg (feature = "std" )] |
141 | impl std::error::Error for RetryError {} |
142 | |
143 | impl core::fmt::Display for RetryError { |
144 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
145 | match *self { |
146 | RetryError::Quadratic(ref err) => err.fmt(f), |
147 | RetryError::Fail(ref err) => err.fmt(f), |
148 | } |
149 | } |
150 | } |
151 | |
152 | impl From<MatchError> for RetryError { |
153 | fn from(merr: MatchError) -> RetryError { |
154 | RetryError::Fail(RetryFailError::from(merr)) |
155 | } |
156 | } |
157 | |
158 | /// An error that occurs when potential quadratic behavior has been detected |
159 | /// when applying either the "reverse suffix" or "reverse inner" optimizations. |
160 | /// |
161 | /// When this error occurs, callers should abandon the "reverse" optimization |
162 | /// and use a normal forward search. |
163 | #[derive(Debug)] |
164 | pub(crate) struct RetryQuadraticError(()); |
165 | |
166 | impl RetryQuadraticError { |
167 | pub(crate) fn new() -> RetryQuadraticError { |
168 | RetryQuadraticError(()) |
169 | } |
170 | } |
171 | |
172 | #[cfg (feature = "std" )] |
173 | impl std::error::Error for RetryQuadraticError {} |
174 | |
175 | impl core::fmt::Display for RetryQuadraticError { |
176 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
177 | write!(f, "regex engine gave up to avoid quadratic behavior" ) |
178 | } |
179 | } |
180 | |
181 | impl From<RetryQuadraticError> for RetryError { |
182 | fn from(err: RetryQuadraticError) -> RetryError { |
183 | RetryError::Quadratic(err) |
184 | } |
185 | } |
186 | |
187 | /// An error that occurs when a regex engine "gives up" for some reason before |
188 | /// finishing a search. Usually this occurs because of heuristic Unicode word |
189 | /// boundary support or because of ineffective cache usage in the lazy DFA. |
190 | /// |
191 | /// When this error occurs, callers should retry the regex search with a |
192 | /// different regex engine. |
193 | /// |
194 | /// Note that this has convenient `From` impls that will automatically |
195 | /// convert a `MatchError` into this error. This works because the meta |
196 | /// regex engine internals guarantee that errors like `HaystackTooLong` and |
197 | /// `UnsupportedAnchored` will never occur. The only errors left are `Quit` and |
198 | /// `GaveUp`, which both correspond to this "failure" error. |
199 | #[derive(Debug)] |
200 | pub(crate) struct RetryFailError { |
201 | offset: usize, |
202 | } |
203 | |
204 | impl RetryFailError { |
205 | pub(crate) fn from_offset(offset: usize) -> RetryFailError { |
206 | RetryFailError { offset } |
207 | } |
208 | } |
209 | |
210 | #[cfg (feature = "std" )] |
211 | impl std::error::Error for RetryFailError {} |
212 | |
213 | impl core::fmt::Display for RetryFailError { |
214 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
215 | write!(f, "regex engine failed at offset {:?}" , self.offset) |
216 | } |
217 | } |
218 | |
219 | impl From<RetryFailError> for RetryError { |
220 | fn from(err: RetryFailError) -> RetryError { |
221 | RetryError::Fail(err) |
222 | } |
223 | } |
224 | |
225 | impl From<MatchError> for RetryFailError { |
226 | fn from(merr: MatchError) -> RetryFailError { |
227 | use crate::util::search::MatchErrorKind::*; |
228 | |
229 | match *merr.kind() { |
230 | Quit { offset, .. } => RetryFailError::from_offset(offset), |
231 | GaveUp { offset } => RetryFailError::from_offset(offset), |
232 | // These can never occur because we avoid them by construction |
233 | // or with higher level control flow logic. For example, the |
234 | // backtracker's wrapper will never hand out a backtracker engine |
235 | // when the haystack would be too long. |
236 | HaystackTooLong { .. } | UnsupportedAnchored { .. } => { |
237 | unreachable!("found impossible error in meta engine: {}" , merr) |
238 | } |
239 | } |
240 | } |
241 | } |
242 | |