1use std::cmp;
2use std::error;
3use std::fmt;
4use std::result;
5
6use crate::ast;
7use crate::hir;
8
9/// A type alias for dealing with errors returned by this crate.
10pub type Result<T> = result::Result<T, Error>;
11
12/// This error type encompasses any error that can be returned by this crate.
13#[derive(Clone, Debug, Eq, PartialEq)]
14pub enum Error {
15 /// An error that occurred while translating concrete syntax into abstract
16 /// syntax (AST).
17 Parse(ast::Error),
18 /// An error that occurred while translating abstract syntax into a high
19 /// level intermediate representation (HIR).
20 Translate(hir::Error),
21 /// Hints that destructuring should not be exhaustive.
22 ///
23 /// This enum may grow additional variants, so this makes sure clients
24 /// don't count on exhaustive matching. (Otherwise, adding a new variant
25 /// could break existing code.)
26 #[doc(hidden)]
27 __Nonexhaustive,
28}
29
30impl From<ast::Error> for Error {
31 fn from(err: ast::Error) -> Error {
32 Error::Parse(err)
33 }
34}
35
36impl From<hir::Error> for Error {
37 fn from(err: hir::Error) -> Error {
38 Error::Translate(err)
39 }
40}
41
42impl error::Error for Error {
43 // TODO: Remove this method entirely on the next breaking semver release.
44 #[allow(deprecated)]
45 fn description(&self) -> &str {
46 match *self {
47 Error::Parse(ref x) => x.description(),
48 Error::Translate(ref x) => x.description(),
49 _ => unreachable!(),
50 }
51 }
52}
53
54impl fmt::Display for Error {
55 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
56 match *self {
57 Error::Parse(ref x) => x.fmt(f),
58 Error::Translate(ref x) => x.fmt(f),
59 _ => unreachable!(),
60 }
61 }
62}
63
64/// A helper type for formatting nice error messages.
65///
66/// This type is responsible for reporting regex parse errors in a nice human
67/// readable format. Most of its complexity is from interspersing notational
68/// markers pointing out the position where an error occurred.
69#[derive(Debug)]
70pub struct Formatter<'e, E> {
71 /// The original regex pattern in which the error occurred.
72 pattern: &'e str,
73 /// The error kind. It must impl fmt::Display.
74 err: &'e E,
75 /// The primary span of the error.
76 span: &'e ast::Span,
77 /// An auxiliary and optional span, in case the error needs to point to
78 /// two locations (e.g., when reporting a duplicate capture group name).
79 aux_span: Option<&'e ast::Span>,
80}
81
82impl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> {
83 fn from(err: &'e ast::Error) -> Self {
84 Formatter {
85 pattern: err.pattern(),
86 err: err.kind(),
87 span: err.span(),
88 aux_span: err.auxiliary_span(),
89 }
90 }
91}
92
93impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> {
94 fn from(err: &'e hir::Error) -> Self {
95 Formatter {
96 pattern: err.pattern(),
97 err: err.kind(),
98 span: err.span(),
99 aux_span: None,
100 }
101 }
102}
103
104impl<'e, E: fmt::Display> fmt::Display for Formatter<'e, E> {
105 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
106 let spans = Spans::from_formatter(self);
107 if self.pattern.contains('\n') {
108 let divider = repeat_char('~', 79);
109
110 writeln!(f, "regex parse error:")?;
111 writeln!(f, "{}", divider)?;
112 let notated = spans.notate();
113 write!(f, "{}", notated)?;
114 writeln!(f, "{}", divider)?;
115 // If we have error spans that cover multiple lines, then we just
116 // note the line numbers.
117 if !spans.multi_line.is_empty() {
118 let mut notes = vec![];
119 for span in &spans.multi_line {
120 notes.push(format!(
121 "on line {} (column {}) through line {} (column {})",
122 span.start.line,
123 span.start.column,
124 span.end.line,
125 span.end.column - 1
126 ));
127 }
128 writeln!(f, "{}", notes.join("\n"))?;
129 }
130 write!(f, "error: {}", self.err)?;
131 } else {
132 writeln!(f, "regex parse error:")?;
133 let notated = Spans::from_formatter(self).notate();
134 write!(f, "{}", notated)?;
135 write!(f, "error: {}", self.err)?;
136 }
137 Ok(())
138 }
139}
140
141/// This type represents an arbitrary number of error spans in a way that makes
142/// it convenient to notate the regex pattern. ("Notate" means "point out
143/// exactly where the error occurred in the regex pattern.")
144///
145/// Technically, we can only ever have two spans given our current error
146/// structure. However, after toiling with a specific algorithm for handling
147/// two spans, it became obvious that an algorithm to handle an arbitrary
148/// number of spans was actually much simpler.
149struct Spans<'p> {
150 /// The original regex pattern string.
151 pattern: &'p str,
152 /// The total width that should be used for line numbers. The width is
153 /// used for left padding the line numbers for alignment.
154 ///
155 /// A value of `0` means line numbers should not be displayed. That is,
156 /// the pattern is itself only one line.
157 line_number_width: usize,
158 /// All error spans that occur on a single line. This sequence always has
159 /// length equivalent to the number of lines in `pattern`, where the index
160 /// of the sequence represents a line number, starting at `0`. The spans
161 /// in each line are sorted in ascending order.
162 by_line: Vec<Vec<ast::Span>>,
163 /// All error spans that occur over one or more lines. That is, the start
164 /// and end position of the span have different line numbers. The spans are
165 /// sorted in ascending order.
166 multi_line: Vec<ast::Span>,
167}
168
169impl<'p> Spans<'p> {
170 /// Build a sequence of spans from a formatter.
171 fn from_formatter<'e, E: fmt::Display>(
172 fmter: &'p Formatter<'e, E>,
173 ) -> Spans<'p> {
174 let mut line_count = fmter.pattern.lines().count();
175 // If the pattern ends with a `\n` literal, then our line count is
176 // off by one, since a span can occur immediately after the last `\n`,
177 // which is consider to be an additional line.
178 if fmter.pattern.ends_with('\n') {
179 line_count += 1;
180 }
181 let line_number_width =
182 if line_count <= 1 { 0 } else { line_count.to_string().len() };
183 let mut spans = Spans {
184 pattern: &fmter.pattern,
185 line_number_width,
186 by_line: vec![vec![]; line_count],
187 multi_line: vec![],
188 };
189 spans.add(fmter.span.clone());
190 if let Some(span) = fmter.aux_span {
191 spans.add(span.clone());
192 }
193 spans
194 }
195
196 /// Add the given span to this sequence, putting it in the right place.
197 fn add(&mut self, span: ast::Span) {
198 // This is grossly inefficient since we sort after each add, but right
199 // now, we only ever add two spans at most.
200 if span.is_one_line() {
201 let i = span.start.line - 1; // because lines are 1-indexed
202 self.by_line[i].push(span);
203 self.by_line[i].sort();
204 } else {
205 self.multi_line.push(span);
206 self.multi_line.sort();
207 }
208 }
209
210 /// Notate the pattern string with carents (`^`) pointing at each span
211 /// location. This only applies to spans that occur within a single line.
212 fn notate(&self) -> String {
213 let mut notated = String::new();
214 for (i, line) in self.pattern.lines().enumerate() {
215 if self.line_number_width > 0 {
216 notated.push_str(&self.left_pad_line_number(i + 1));
217 notated.push_str(": ");
218 } else {
219 notated.push_str(" ");
220 }
221 notated.push_str(line);
222 notated.push('\n');
223 if let Some(notes) = self.notate_line(i) {
224 notated.push_str(¬es);
225 notated.push('\n');
226 }
227 }
228 notated
229 }
230
231 /// Return notes for the line indexed at `i` (zero-based). If there are no
232 /// spans for the given line, then `None` is returned. Otherwise, an
233 /// appropriately space padded string with correctly positioned `^` is
234 /// returned, accounting for line numbers.
235 fn notate_line(&self, i: usize) -> Option<String> {
236 let spans = &self.by_line[i];
237 if spans.is_empty() {
238 return None;
239 }
240 let mut notes = String::new();
241 for _ in 0..self.line_number_padding() {
242 notes.push(' ');
243 }
244 let mut pos = 0;
245 for span in spans {
246 for _ in pos..(span.start.column - 1) {
247 notes.push(' ');
248 pos += 1;
249 }
250 let note_len = span.end.column.saturating_sub(span.start.column);
251 for _ in 0..cmp::max(1, note_len) {
252 notes.push('^');
253 pos += 1;
254 }
255 }
256 Some(notes)
257 }
258
259 /// Left pad the given line number with spaces such that it is aligned with
260 /// other line numbers.
261 fn left_pad_line_number(&self, n: usize) -> String {
262 let n = n.to_string();
263 let pad = self.line_number_width.checked_sub(n.len()).unwrap();
264 let mut result = repeat_char(' ', pad);
265 result.push_str(&n);
266 result
267 }
268
269 /// Return the line number padding beginning at the start of each line of
270 /// the pattern.
271 ///
272 /// If the pattern is only one line, then this returns a fixed padding
273 /// for visual indentation.
274 fn line_number_padding(&self) -> usize {
275 if self.line_number_width == 0 {
276 4
277 } else {
278 2 + self.line_number_width
279 }
280 }
281}
282
283fn repeat_char(c: char, count: usize) -> String {
284 ::std::iter::repeat(c).take(count).collect()
285}
286
287#[cfg(test)]
288mod tests {
289 use crate::ast::parse::Parser;
290
291 fn assert_panic_message(pattern: &str, expected_msg: &str) {
292 let result = Parser::new().parse(pattern);
293 match result {
294 Ok(_) => {
295 panic!("regex should not have parsed");
296 }
297 Err(err) => {
298 assert_eq!(err.to_string(), expected_msg.trim());
299 }
300 }
301 }
302
303 // See: https://github.com/rust-lang/regex/issues/464
304 #[test]
305 fn regression_464() {
306 let err = Parser::new().parse("a{\n").unwrap_err();
307 // This test checks that the error formatter doesn't panic.
308 assert!(!err.to_string().is_empty());
309 }
310
311 // See: https://github.com/rust-lang/regex/issues/545
312 #[test]
313 fn repetition_quantifier_expects_a_valid_decimal() {
314 assert_panic_message(
315 r"\\u{[^}]*}",
316 r#"
317regex parse error:
318 \\u{[^}]*}
319 ^
320error: repetition quantifier expects a valid decimal
321"#,
322 );
323 }
324}
325