1use regex_syntax::{ast, hir};
2
3use crate::{nfa, util::search::MatchError, PatternID};
4
5/// An error that occurs when construction of a `Regex` fails.
6///
7/// A build error is generally a result of one of two possible failure
8/// modes. First is a parse or syntax error in the concrete syntax of a
9/// pattern. Second is that the construction of the underlying regex matcher
10/// fails, usually because it gets too big with respect to limits like
11/// [`Config::nfa_size_limit`](crate::meta::Config::nfa_size_limit).
12///
13/// This error provides very little introspection capabilities. You can:
14///
15/// * Ask for the [`PatternID`] of the pattern that caused an error, if one
16/// is available. This is available for things like syntax errors, but not for
17/// cases where build limits are exceeded.
18/// * Ask for the underlying syntax error, but only if the error is a syntax
19/// error.
20/// * Ask for a human readable message corresponding to the underlying error.
21/// * The `BuildError::source` method (from the `std::error::Error`
22/// trait implementation) may be used to query for an underlying error if one
23/// exists. There are no API guarantees about which error is returned.
24///
25/// When the `std` feature is enabled, this implements `std::error::Error`.
26#[derive(Clone, Debug)]
27pub struct BuildError {
28 kind: BuildErrorKind,
29}
30
31#[derive(Clone, Debug)]
32enum BuildErrorKind {
33 Syntax { pid: PatternID, err: regex_syntax::Error },
34 NFA(nfa::thompson::BuildError),
35}
36
37impl BuildError {
38 /// If it is known which pattern ID caused this build error to occur, then
39 /// this method returns it.
40 ///
41 /// Some errors are not associated with a particular pattern. However, any
42 /// errors that occur as part of parsing a pattern are guaranteed to be
43 /// associated with a pattern ID.
44 ///
45 /// # Example
46 ///
47 /// ```
48 /// use regex_automata::{meta::Regex, PatternID};
49 ///
50 /// let err = Regex::new_many(&["a", "b", r"\p{Foo}", "c"]).unwrap_err();
51 /// assert_eq!(Some(PatternID::must(2)), err.pattern());
52 /// ```
53 pub fn pattern(&self) -> Option<PatternID> {
54 match self.kind {
55 BuildErrorKind::Syntax { pid, .. } => Some(pid),
56 _ => None,
57 }
58 }
59
60 /// If this error occurred because the regex exceeded the configured size
61 /// limit before being built, then this returns the configured size limit.
62 ///
63 /// The limit returned is what was configured, and corresponds to the
64 /// maximum amount of heap usage in bytes.
65 pub fn size_limit(&self) -> Option<usize> {
66 match self.kind {
67 BuildErrorKind::NFA(ref err) => err.size_limit(),
68 _ => None,
69 }
70 }
71
72 /// If this error corresponds to a syntax error, then a reference to it is
73 /// returned by this method.
74 pub fn syntax_error(&self) -> Option<&regex_syntax::Error> {
75 match self.kind {
76 BuildErrorKind::Syntax { ref err, .. } => Some(err),
77 _ => None,
78 }
79 }
80
81 pub(crate) fn ast(pid: PatternID, err: ast::Error) -> BuildError {
82 let err = regex_syntax::Error::from(err);
83 BuildError { kind: BuildErrorKind::Syntax { pid, err } }
84 }
85
86 pub(crate) fn hir(pid: PatternID, err: hir::Error) -> BuildError {
87 let err = regex_syntax::Error::from(err);
88 BuildError { kind: BuildErrorKind::Syntax { pid, err } }
89 }
90
91 pub(crate) fn nfa(err: nfa::thompson::BuildError) -> BuildError {
92 BuildError { kind: BuildErrorKind::NFA(err) }
93 }
94}
95
96#[cfg(feature = "std")]
97impl std::error::Error for BuildError {
98 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
99 match self.kind {
100 BuildErrorKind::Syntax { ref err, .. } => Some(err),
101 BuildErrorKind::NFA(ref err) => Some(err),
102 }
103 }
104}
105
106impl core::fmt::Display for BuildError {
107 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
108 match self.kind {
109 BuildErrorKind::Syntax { pid, .. } => {
110 write!(f, "error parsing pattern {}", pid.as_usize())
111 }
112 BuildErrorKind::NFA(_) => write!(f, "error building NFA"),
113 }
114 }
115}
116
117/// An error that occurs when a search should be retried.
118///
119/// This retry error distinguishes between two different failure modes.
120///
121/// The first is one where potential quadratic behavior has been detected.
122/// In this case, whatever optimization that led to this behavior should be
123/// stopped, and the next best strategy should be used.
124///
125/// The second indicates that the underlying regex engine has failed for some
126/// reason. This usually occurs because either a lazy DFA's cache has become
127/// ineffective or because a non-ASCII byte has been seen *and* a Unicode word
128/// boundary was used in one of the patterns. In this failure case, a different
129/// regex engine that won't fail in these ways (PikeVM, backtracker or the
130/// one-pass DFA) should be used.
131///
132/// This is an internal error only and should never bleed into the public
133/// API.
134#[derive(Debug)]
135pub(crate) enum RetryError {
136 Quadratic(RetryQuadraticError),
137 Fail(RetryFailError),
138}
139
140#[cfg(feature = "std")]
141impl std::error::Error for RetryError {}
142
143impl core::fmt::Display for RetryError {
144 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
145 match *self {
146 RetryError::Quadratic(ref err) => err.fmt(f),
147 RetryError::Fail(ref err) => err.fmt(f),
148 }
149 }
150}
151
152impl From<MatchError> for RetryError {
153 fn from(merr: MatchError) -> RetryError {
154 RetryError::Fail(RetryFailError::from(merr))
155 }
156}
157
158/// An error that occurs when potential quadratic behavior has been detected
159/// when applying either the "reverse suffix" or "reverse inner" optimizations.
160///
161/// When this error occurs, callers should abandon the "reverse" optimization
162/// and use a normal forward search.
163#[derive(Debug)]
164pub(crate) struct RetryQuadraticError(());
165
166impl RetryQuadraticError {
167 pub(crate) fn new() -> RetryQuadraticError {
168 RetryQuadraticError(())
169 }
170}
171
172#[cfg(feature = "std")]
173impl std::error::Error for RetryQuadraticError {}
174
175impl core::fmt::Display for RetryQuadraticError {
176 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
177 write!(f, "regex engine gave up to avoid quadratic behavior")
178 }
179}
180
181impl From<RetryQuadraticError> for RetryError {
182 fn from(err: RetryQuadraticError) -> RetryError {
183 RetryError::Quadratic(err)
184 }
185}
186
187/// An error that occurs when a regex engine "gives up" for some reason before
188/// finishing a search. Usually this occurs because of heuristic Unicode word
189/// boundary support or because of ineffective cache usage in the lazy DFA.
190///
191/// When this error occurs, callers should retry the regex search with a
192/// different regex engine.
193///
194/// Note that this has convenient `From` impls that will automatically
195/// convert a `MatchError` into this error. This works because the meta
196/// regex engine internals guarantee that errors like `HaystackTooLong` and
197/// `UnsupportedAnchored` will never occur. The only errors left are `Quit` and
198/// `GaveUp`, which both correspond to this "failure" error.
199#[derive(Debug)]
200pub(crate) struct RetryFailError {
201 offset: usize,
202}
203
204impl RetryFailError {
205 pub(crate) fn from_offset(offset: usize) -> RetryFailError {
206 RetryFailError { offset }
207 }
208}
209
210#[cfg(feature = "std")]
211impl std::error::Error for RetryFailError {}
212
213impl core::fmt::Display for RetryFailError {
214 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
215 write!(f, "regex engine failed at offset {:?}", self.offset)
216 }
217}
218
219impl From<RetryFailError> for RetryError {
220 fn from(err: RetryFailError) -> RetryError {
221 RetryError::Fail(err)
222 }
223}
224
225impl From<MatchError> for RetryFailError {
226 fn from(merr: MatchError) -> RetryFailError {
227 use crate::util::search::MatchErrorKind::*;
228
229 match *merr.kind() {
230 Quit { offset, .. } => RetryFailError::from_offset(offset),
231 GaveUp { offset } => RetryFailError::from_offset(offset),
232 // These can never occur because we avoid them by construction
233 // or with higher level control flow logic. For example, the
234 // backtracker's wrapper will never hand out a backtracker engine
235 // when the haystack would be too long.
236 HaystackTooLong { .. } | UnsupportedAnchored { .. } => {
237 unreachable!("found impossible error in meta engine: {}", merr)
238 }
239 }
240 }
241}
242