1use alloc::{
2 format,
3 string::{String, ToString},
4 vec,
5 vec::Vec,
6};
7
8use crate::{ast, hir};
9
10/// This error type encompasses any error that can be returned by this crate.
11///
12/// This error type is marked as `non_exhaustive`. This means that adding a
13/// new variant is not considered a breaking change.
14#[non_exhaustive]
15#[derive(Clone, Debug, Eq, PartialEq)]
16pub enum Error {
17 /// An error that occurred while translating concrete syntax into abstract
18 /// syntax (AST).
19 Parse(ast::Error),
20 /// An error that occurred while translating abstract syntax into a high
21 /// level intermediate representation (HIR).
22 Translate(hir::Error),
23}
24
25impl From<ast::Error> for Error {
26 fn from(err: ast::Error) -> Error {
27 Error::Parse(err)
28 }
29}
30
31impl From<hir::Error> for Error {
32 fn from(err: hir::Error) -> Error {
33 Error::Translate(err)
34 }
35}
36
37#[cfg(feature = "std")]
38impl std::error::Error for Error {}
39
40impl core::fmt::Display for Error {
41 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
42 match *self {
43 Error::Parse(ref x) => x.fmt(f),
44 Error::Translate(ref x) => x.fmt(f),
45 }
46 }
47}
48
49/// A helper type for formatting nice error messages.
50///
51/// This type is responsible for reporting regex parse errors in a nice human
52/// readable format. Most of its complexity is from interspersing notational
53/// markers pointing out the position where an error occurred.
54#[derive(Debug)]
55pub struct Formatter<'e, E> {
56 /// The original regex pattern in which the error occurred.
57 pattern: &'e str,
58 /// The error kind. It must impl fmt::Display.
59 err: &'e E,
60 /// The primary span of the error.
61 span: &'e ast::Span,
62 /// An auxiliary and optional span, in case the error needs to point to
63 /// two locations (e.g., when reporting a duplicate capture group name).
64 aux_span: Option<&'e ast::Span>,
65}
66
67impl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> {
68 fn from(err: &'e ast::Error) -> Self {
69 Formatter {
70 pattern: err.pattern(),
71 err: err.kind(),
72 span: err.span(),
73 aux_span: err.auxiliary_span(),
74 }
75 }
76}
77
78impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> {
79 fn from(err: &'e hir::Error) -> Self {
80 Formatter {
81 pattern: err.pattern(),
82 err: err.kind(),
83 span: err.span(),
84 aux_span: None,
85 }
86 }
87}
88
89impl<'e, E: core::fmt::Display> core::fmt::Display for Formatter<'e, E> {
90 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
91 let spans = Spans::from_formatter(self);
92 if self.pattern.contains('\n') {
93 let divider = repeat_char('~', 79);
94
95 writeln!(f, "regex parse error:")?;
96 writeln!(f, "{}", divider)?;
97 let notated = spans.notate();
98 write!(f, "{}", notated)?;
99 writeln!(f, "{}", divider)?;
100 // If we have error spans that cover multiple lines, then we just
101 // note the line numbers.
102 if !spans.multi_line.is_empty() {
103 let mut notes = vec![];
104 for span in &spans.multi_line {
105 notes.push(format!(
106 "on line {} (column {}) through line {} (column {})",
107 span.start.line,
108 span.start.column,
109 span.end.line,
110 span.end.column - 1
111 ));
112 }
113 writeln!(f, "{}", notes.join("\n"))?;
114 }
115 write!(f, "error: {}", self.err)?;
116 } else {
117 writeln!(f, "regex parse error:")?;
118 let notated = Spans::from_formatter(self).notate();
119 write!(f, "{}", notated)?;
120 write!(f, "error: {}", self.err)?;
121 }
122 Ok(())
123 }
124}
125
126/// This type represents an arbitrary number of error spans in a way that makes
127/// it convenient to notate the regex pattern. ("Notate" means "point out
128/// exactly where the error occurred in the regex pattern.")
129///
130/// Technically, we can only ever have two spans given our current error
131/// structure. However, after toiling with a specific algorithm for handling
132/// two spans, it became obvious that an algorithm to handle an arbitrary
133/// number of spans was actually much simpler.
134struct Spans<'p> {
135 /// The original regex pattern string.
136 pattern: &'p str,
137 /// The total width that should be used for line numbers. The width is
138 /// used for left padding the line numbers for alignment.
139 ///
140 /// A value of `0` means line numbers should not be displayed. That is,
141 /// the pattern is itself only one line.
142 line_number_width: usize,
143 /// All error spans that occur on a single line. This sequence always has
144 /// length equivalent to the number of lines in `pattern`, where the index
145 /// of the sequence represents a line number, starting at `0`. The spans
146 /// in each line are sorted in ascending order.
147 by_line: Vec<Vec<ast::Span>>,
148 /// All error spans that occur over one or more lines. That is, the start
149 /// and end position of the span have different line numbers. The spans are
150 /// sorted in ascending order.
151 multi_line: Vec<ast::Span>,
152}
153
154impl<'p> Spans<'p> {
155 /// Build a sequence of spans from a formatter.
156 fn from_formatter<'e, E: core::fmt::Display>(
157 fmter: &'p Formatter<'e, E>,
158 ) -> Spans<'p> {
159 let mut line_count = fmter.pattern.lines().count();
160 // If the pattern ends with a `\n` literal, then our line count is
161 // off by one, since a span can occur immediately after the last `\n`,
162 // which is consider to be an additional line.
163 if fmter.pattern.ends_with('\n') {
164 line_count += 1;
165 }
166 let line_number_width =
167 if line_count <= 1 { 0 } else { line_count.to_string().len() };
168 let mut spans = Spans {
169 pattern: &fmter.pattern,
170 line_number_width,
171 by_line: vec![vec![]; line_count],
172 multi_line: vec![],
173 };
174 spans.add(fmter.span.clone());
175 if let Some(span) = fmter.aux_span {
176 spans.add(span.clone());
177 }
178 spans
179 }
180
181 /// Add the given span to this sequence, putting it in the right place.
182 fn add(&mut self, span: ast::Span) {
183 // This is grossly inefficient since we sort after each add, but right
184 // now, we only ever add two spans at most.
185 if span.is_one_line() {
186 let i = span.start.line - 1; // because lines are 1-indexed
187 self.by_line[i].push(span);
188 self.by_line[i].sort();
189 } else {
190 self.multi_line.push(span);
191 self.multi_line.sort();
192 }
193 }
194
195 /// Notate the pattern string with carents (`^`) pointing at each span
196 /// location. This only applies to spans that occur within a single line.
197 fn notate(&self) -> String {
198 let mut notated = String::new();
199 for (i, line) in self.pattern.lines().enumerate() {
200 if self.line_number_width > 0 {
201 notated.push_str(&self.left_pad_line_number(i + 1));
202 notated.push_str(": ");
203 } else {
204 notated.push_str(" ");
205 }
206 notated.push_str(line);
207 notated.push('\n');
208 if let Some(notes) = self.notate_line(i) {
209 notated.push_str(¬es);
210 notated.push('\n');
211 }
212 }
213 notated
214 }
215
216 /// Return notes for the line indexed at `i` (zero-based). If there are no
217 /// spans for the given line, then `None` is returned. Otherwise, an
218 /// appropriately space padded string with correctly positioned `^` is
219 /// returned, accounting for line numbers.
220 fn notate_line(&self, i: usize) -> Option<String> {
221 let spans = &self.by_line[i];
222 if spans.is_empty() {
223 return None;
224 }
225 let mut notes = String::new();
226 for _ in 0..self.line_number_padding() {
227 notes.push(' ');
228 }
229 let mut pos = 0;
230 for span in spans {
231 for _ in pos..(span.start.column - 1) {
232 notes.push(' ');
233 pos += 1;
234 }
235 let note_len = span.end.column.saturating_sub(span.start.column);
236 for _ in 0..core::cmp::max(1, note_len) {
237 notes.push('^');
238 pos += 1;
239 }
240 }
241 Some(notes)
242 }
243
244 /// Left pad the given line number with spaces such that it is aligned with
245 /// other line numbers.
246 fn left_pad_line_number(&self, n: usize) -> String {
247 let n = n.to_string();
248 let pad = self.line_number_width.checked_sub(n.len()).unwrap();
249 let mut result = repeat_char(' ', pad);
250 result.push_str(&n);
251 result
252 }
253
254 /// Return the line number padding beginning at the start of each line of
255 /// the pattern.
256 ///
257 /// If the pattern is only one line, then this returns a fixed padding
258 /// for visual indentation.
259 fn line_number_padding(&self) -> usize {
260 if self.line_number_width == 0 {
261 4
262 } else {
263 2 + self.line_number_width
264 }
265 }
266}
267
268fn repeat_char(c: char, count: usize) -> String {
269 core::iter::repeat(c).take(count).collect()
270}
271
272#[cfg(test)]
273mod tests {
274 use alloc::string::ToString;
275
276 use crate::ast::parse::Parser;
277
278 fn assert_panic_message(pattern: &str, expected_msg: &str) {
279 let result = Parser::new().parse(pattern);
280 match result {
281 Ok(_) => {
282 panic!("regex should not have parsed");
283 }
284 Err(err) => {
285 assert_eq!(err.to_string(), expected_msg.trim());
286 }
287 }
288 }
289
290 // See: https://github.com/rust-lang/regex/issues/464
291 #[test]
292 fn regression_464() {
293 let err = Parser::new().parse("a{\n").unwrap_err();
294 // This test checks that the error formatter doesn't panic.
295 assert!(!err.to_string().is_empty());
296 }
297
298 // See: https://github.com/rust-lang/regex/issues/545
299 #[test]
300 fn repetition_quantifier_expects_a_valid_decimal() {
301 assert_panic_message(
302 r"\\u{[^}]*}",
303 r#"
304regex parse error:
305 \\u{[^}]*}
306 ^
307error: repetition quantifier expects a valid decimal
308"#,
309 );
310 }
311}
312