error.rs source code [crates/regex_syntax/src/error.rs]

1	use alloc::{
2	format,
3	string::{String, ToString},
4	vec,
5	vec::Vec,
6	};
7
8	use crate::{ast, hir};
9
10	/// This error type encompasses any error that can be returned by this crate.
11	///
12	/// This error type is marked as `non_exhaustive`. This means that adding a
13	/// new variant is not considered a breaking change.
14	#[non_exhaustive]
15	#[derive(Clone, Debug, Eq, PartialEq)]
16	pub enum Error {
17	/// An error that occurred while translating concrete syntax into abstract
18	/// syntax (AST).
19	Parse(ast::Error),
20	/// An error that occurred while translating abstract syntax into a high
21	/// level intermediate representation (HIR).
22	Translate(hir::Error),
23	}
24
25	impl From<ast::Error> for Error {
26	fn from(err: ast::Error) -> Error {
27	Error::Parse(err)
28	}
29	}
30
31	impl From<hir::Error> for Error {
32	fn from(err: hir::Error) -> Error {
33	Error::Translate(err)
34	}
35	}
36
37	#[cfg(feature = "std")]
38	impl std::error::Error for Error {}
39
40	impl core::fmt::Display for Error {
41	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
42	match *self {
43	Error::Parse(ref x: &Error) => x.fmt(f),
44	Error::Translate(ref x: &Error) => x.fmt(f),
45	}
46	}
47	}
48
49	/// A helper type for formatting nice error messages.
50	///
51	/// This type is responsible for reporting regex parse errors in a nice human
52	/// readable format. Most of its complexity is from interspersing notational
53	/// markers pointing out the position where an error occurred.
54	#[derive(Debug)]
55	pub struct Formatter<'e, E> {
56	/// The original regex pattern in which the error occurred.
57	pattern: &'e str,
58	/// The error kind. It must impl fmt::Display.
59	err: &'e E,
60	/// The primary span of the error.
61	span: &'e ast::Span,
62	/// An auxiliary and optional span, in case the error needs to point to
63	/// two locations (e.g., when reporting a duplicate capture group name).
64	aux_span: Option<&'e ast::Span>,
65	}
66
67	impl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> {
68	fn from(err: &'e ast::Error) -> Self {
69	Formatter {
70	pattern: err.pattern(),
71	err: err.kind(),
72	span: err.span(),
73	aux_span: err.auxiliary_span(),
74	}
75	}
76	}
77
78	impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> {
79	fn from(err: &'e hir::Error) -> Self {
80	Formatter {
81	pattern: err.pattern(),
82	err: err.kind(),
83	span: err.span(),
84	aux_span: None,
85	}
86	}
87	}
88
89	impl<'e, E: core::fmt::Display> core::fmt::Display for Formatter<'e, E> {
90	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
91	let spans = Spans::from_formatter(self);
92	if self.pattern.contains('`\n`') {
93	let divider = repeat_char('~', `79`);
94
95	writeln!(f, "regex parse error:")?;
96	writeln!(f, "{}", divider)?;
97	let notated = spans.notate();
98	write!(f, "{}", notated)?;
99	writeln!(f, "{}", divider)?;
100	// If we have error spans that cover multiple lines, then we just
101	// note the line numbers.
102	if !spans.multi_line.is_empty() {
103	let mut notes = vec![];
104	for span in &spans.multi_line {
105	notes.push(format!(
106	"on line {} (column {}) through line {} (column {})",
107	span.start.line,
108	span.start.column,
109	span.end.line,
110	span.end.column - `1`
111	));
112	}
113	writeln!(f, "{}", notes.join("`\n`"))?;
114	}
115	write!(f, "error: {}", self.err)?;
116	} else {
117	writeln!(f, "regex parse error:")?;
118	let notated = Spans::from_formatter(self).notate();
119	write!(f, "{}", notated)?;
120	write!(f, "error: {}", self.err)?;
121	}
122	Ok(())
123	}
124	}
125
126	/// This type represents an arbitrary number of error spans in a way that makes
127	/// it convenient to notate the regex pattern. ("Notate" means "point out
128	/// exactly where the error occurred in the regex pattern.")
129	///
130	/// Technically, we can only ever have two spans given our current error
131	/// structure. However, after toiling with a specific algorithm for handling
132	/// two spans, it became obvious that an algorithm to handle an arbitrary
133	/// number of spans was actually much simpler.
134	struct Spans<'p> {
135	/// The original regex pattern string.
136	pattern: &'p str,
137	/// The total width that should be used for line numbers. The width is
138	/// used for left padding the line numbers for alignment.
139	///
140	/// A value of `0` means line numbers should not be displayed. That is,
141	/// the pattern is itself only one line.
142	line_number_width: usize,
143	/// All error spans that occur on a single line. This sequence always has
144	/// length equivalent to the number of lines in `pattern`, where the index
145	/// of the sequence represents a line number, starting at `0`. The spans
146	/// in each line are sorted in ascending order.
147	by_line: Vec<Vec<ast::Span>>,
148	/// All error spans that occur over one or more lines. That is, the start
149	/// and end position of the span have different line numbers. The spans are
150	/// sorted in ascending order.
151	multi_line: Vec<ast::Span>,
152	}
153
154	impl<'p> Spans<'p> {
155	/// Build a sequence of spans from a formatter.
156	fn from_formatter<'e, E: core::fmt::Display>(
157	fmter: &'p Formatter<'e, E>,
158	) -> Spans<'p> {
159	let mut line_count = fmter.pattern.lines().count();
160	// If the pattern ends with a `\n` literal, then our line count is
161	// off by one, since a span can occur immediately after the last `\n`,
162	// which is consider to be an additional line.
163	if fmter.pattern.ends_with('`\n`') {
164	line_count += `1`;
165	}
166	let line_number_width =
167	if line_count <= `1` { `0` } else { line_count.to_string().len() };
168	let mut spans = Spans {
169	pattern: &fmter.pattern,
170	line_number_width,
171	by_line: vec![vec![]; line_count],
172	multi_line: vec![],
173	};
174	spans.add(fmter.span.clone());
175	if let Some(span) = fmter.aux_span {
176	spans.add(span.clone());
177	}
178	spans
179	}
180
181	/// Add the given span to this sequence, putting it in the right place.
182	fn add(&mut self, span: ast::Span) {
183	// This is grossly inefficient since we sort after each add, but right
184	// now, we only ever add two spans at most.
185	if span.is_one_line() {
186	let i = span.start.line - `1`; // because lines are 1-indexed
187	self.by_line[i].push(span);
188	self.by_line[i].sort();
189	} else {
190	self.multi_line.push(span);
191	self.multi_line.sort();
192	}
193	}
194
195	/// Notate the pattern string with carents (`^`) pointing at each span
196	/// location. This only applies to spans that occur within a single line.
197	fn notate(&self) -> String {
198	let mut notated = String::new();
199	for (i, line) in self.pattern.lines().enumerate() {
200	if self.line_number_width > `0` {
201	notated.push_str(&self.left_pad_line_number(i + `1`));
202	notated.push_str(": ");
203	} else {
204	notated.push_str(" ");
205	}
206	notated.push_str(line);
207	notated.push('`\n`');
208	if let Some(notes) = self.notate_line(i) {
209	notated.push_str(&notes);
210	notated.push('`\n`');
211	}
212	}
213	notated
214	}
215
216	/// Return notes for the line indexed at `i` (zero-based). If there are no
217	/// spans for the given line, then `None` is returned. Otherwise, an
218	/// appropriately space padded string with correctly positioned `^` is
219	/// returned, accounting for line numbers.
220	fn notate_line(&self, i: usize) -> Option<String> {
221	let spans = &self.by_line[i];
222	if spans.is_empty() {
223	return None;
224	}
225	let mut notes = String::new();
226	for _ in `0`..self.line_number_padding() {
227	notes.push(' ');
228	}
229	let mut pos = `0`;
230	for span in spans {
231	for _ in pos..(span.start.column - `1`) {
232	notes.push(' ');
233	pos += `1`;
234	}
235	let note_len = span.end.column.saturating_sub(span.start.column);
236	for _ in `0`..core::cmp::max(`1`, note_len) {
237	notes.push('^');
238	pos += `1`;
239	}
240	}
241	Some(notes)
242	}
243
244	/// Left pad the given line number with spaces such that it is aligned with
245	/// other line numbers.
246	fn left_pad_line_number(&self, n: usize) -> String {
247	let n = n.to_string();
248	let pad = self.line_number_width.checked_sub(n.len()).unwrap();
249	let mut result = repeat_char(' ', pad);
250	result.push_str(&n);
251	result
252	}
253
254	/// Return the line number padding beginning at the start of each line of
255	/// the pattern.
256	///
257	/// If the pattern is only one line, then this returns a fixed padding
258	/// for visual indentation.
259	fn line_number_padding(&self) -> usize {
260	if self.line_number_width == `0` {
261	`4`
262	} else {
263	`2` + self.line_number_width
264	}
265	}
266	}
267
268	fn repeat_char(c: char, count: usize) -> String {
269	core::iter::repeat(elt:c).take(count).collect()
270	}
271
272	#[cfg(test)]
273	mod tests {
274	use alloc::string::ToString;
275
276	use crate::ast::parse::Parser;
277
278	fn assert_panic_message(pattern: &str, expected_msg: &str) {
279	let result = Parser::new().parse(pattern);
280	match result {
281	Ok(_) => {
282	panic!("regex should not have parsed");
283	}
284	Err(err) => {
285	assert_eq!(err.to_string(), expected_msg.trim());
286	}
287	}
288	}
289
290	// See: https://github.com/rust-lang/regex/issues/464
291	#[test]
292	fn regression_464() {
293	let err = Parser::new().parse("a{`\n`").unwrap_err();
294	// This test checks that the error formatter doesn't panic.
295	assert!(!err.to_string().is_empty());
296	}
297
298	// See: https://github.com/rust-lang/regex/issues/545
299	#[test]
300	fn repetition_quantifier_expects_a_valid_decimal() {
301	assert_panic_message(
302	r"\\u{[^}]*}",
303	r#"
304	regex parse error:
305	\\u{[^}]*}
306	^
307	error: repetition quantifier expects a valid decimal
308	"#,
309	);
310	}
311	}
312