| 1 | use alloc::{ | 
| 2 | format, | 
|---|
| 3 | string::{String, ToString}, | 
|---|
| 4 | vec, | 
|---|
| 5 | vec::Vec, | 
|---|
| 6 | }; | 
|---|
| 7 |  | 
|---|
| 8 | use crate::{ast, hir}; | 
|---|
| 9 |  | 
|---|
| 10 | /// This error type encompasses any error that can be returned by this crate. | 
|---|
| 11 | /// | 
|---|
| 12 | /// This error type is marked as `non_exhaustive`. This means that adding a | 
|---|
| 13 | /// new variant is not considered a breaking change. | 
|---|
| 14 | #[ non_exhaustive] | 
|---|
| 15 | #[ derive(Clone, Debug, Eq, PartialEq)] | 
|---|
| 16 | pub enum Error { | 
|---|
| 17 | /// An error that occurred while translating concrete syntax into abstract | 
|---|
| 18 | /// syntax (AST). | 
|---|
| 19 | Parse(ast::Error), | 
|---|
| 20 | /// An error that occurred while translating abstract syntax into a high | 
|---|
| 21 | /// level intermediate representation (HIR). | 
|---|
| 22 | Translate(hir::Error), | 
|---|
| 23 | } | 
|---|
| 24 |  | 
|---|
| 25 | impl From<ast::Error> for Error { | 
|---|
| 26 | fn from(err: ast::Error) -> Error { | 
|---|
| 27 | Error::Parse(err) | 
|---|
| 28 | } | 
|---|
| 29 | } | 
|---|
| 30 |  | 
|---|
| 31 | impl From<hir::Error> for Error { | 
|---|
| 32 | fn from(err: hir::Error) -> Error { | 
|---|
| 33 | Error::Translate(err) | 
|---|
| 34 | } | 
|---|
| 35 | } | 
|---|
| 36 |  | 
|---|
| 37 | #[ cfg(feature = "std")] | 
|---|
| 38 | impl std::error::Error for Error {} | 
|---|
| 39 |  | 
|---|
| 40 | impl core::fmt::Display for Error { | 
|---|
| 41 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { | 
|---|
| 42 | match *self { | 
|---|
| 43 | Error::Parse(ref x: &Error) => x.fmt(f), | 
|---|
| 44 | Error::Translate(ref x: &Error) => x.fmt(f), | 
|---|
| 45 | } | 
|---|
| 46 | } | 
|---|
| 47 | } | 
|---|
| 48 |  | 
|---|
| 49 | /// A helper type for formatting nice error messages. | 
|---|
| 50 | /// | 
|---|
| 51 | /// This type is responsible for reporting regex parse errors in a nice human | 
|---|
| 52 | /// readable format. Most of its complexity is from interspersing notational | 
|---|
| 53 | /// markers pointing out the position where an error occurred. | 
|---|
| 54 | #[ derive(Debug)] | 
|---|
| 55 | pub struct Formatter<'e, E> { | 
|---|
| 56 | /// The original regex pattern in which the error occurred. | 
|---|
| 57 | pattern: &'e str, | 
|---|
| 58 | /// The error kind. It must impl fmt::Display. | 
|---|
| 59 | err: &'e E, | 
|---|
| 60 | /// The primary span of the error. | 
|---|
| 61 | span: &'e ast::Span, | 
|---|
| 62 | /// An auxiliary and optional span, in case the error needs to point to | 
|---|
| 63 | /// two locations (e.g., when reporting a duplicate capture group name). | 
|---|
| 64 | aux_span: Option<&'e ast::Span>, | 
|---|
| 65 | } | 
|---|
| 66 |  | 
|---|
| 67 | impl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> { | 
|---|
| 68 | fn from(err: &'e ast::Error) -> Self { | 
|---|
| 69 | Formatter { | 
|---|
| 70 | pattern: err.pattern(), | 
|---|
| 71 | err: err.kind(), | 
|---|
| 72 | span: err.span(), | 
|---|
| 73 | aux_span: err.auxiliary_span(), | 
|---|
| 74 | } | 
|---|
| 75 | } | 
|---|
| 76 | } | 
|---|
| 77 |  | 
|---|
| 78 | impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> { | 
|---|
| 79 | fn from(err: &'e hir::Error) -> Self { | 
|---|
| 80 | Formatter { | 
|---|
| 81 | pattern: err.pattern(), | 
|---|
| 82 | err: err.kind(), | 
|---|
| 83 | span: err.span(), | 
|---|
| 84 | aux_span: None, | 
|---|
| 85 | } | 
|---|
| 86 | } | 
|---|
| 87 | } | 
|---|
| 88 |  | 
|---|
| 89 | impl<'e, E: core::fmt::Display> core::fmt::Display for Formatter<'e, E> { | 
|---|
| 90 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { | 
|---|
| 91 | let spans = Spans::from_formatter(self); | 
|---|
| 92 | if self.pattern.contains( '\n ') { | 
|---|
| 93 | let divider = repeat_char( '~', 79); | 
|---|
| 94 |  | 
|---|
| 95 | writeln!(f, "regex parse error:")?; | 
|---|
| 96 | writeln!(f, "{} ", divider)?; | 
|---|
| 97 | let notated = spans.notate(); | 
|---|
| 98 | write!(f, "{} ", notated)?; | 
|---|
| 99 | writeln!(f, "{} ", divider)?; | 
|---|
| 100 | // If we have error spans that cover multiple lines, then we just | 
|---|
| 101 | // note the line numbers. | 
|---|
| 102 | if !spans.multi_line.is_empty() { | 
|---|
| 103 | let mut notes = vec![]; | 
|---|
| 104 | for span in &spans.multi_line { | 
|---|
| 105 | notes.push(format!( | 
|---|
| 106 | "on line {}  (column {} ) through line {}  (column {} )", | 
|---|
| 107 | span.start.line, | 
|---|
| 108 | span.start.column, | 
|---|
| 109 | span.end.line, | 
|---|
| 110 | span.end.column - 1 | 
|---|
| 111 | )); | 
|---|
| 112 | } | 
|---|
| 113 | writeln!(f, "{} ", notes.join( "\n "))?; | 
|---|
| 114 | } | 
|---|
| 115 | write!(f, "error: {} ", self.err)?; | 
|---|
| 116 | } else { | 
|---|
| 117 | writeln!(f, "regex parse error:")?; | 
|---|
| 118 | let notated = Spans::from_formatter(self).notate(); | 
|---|
| 119 | write!(f, "{} ", notated)?; | 
|---|
| 120 | write!(f, "error: {} ", self.err)?; | 
|---|
| 121 | } | 
|---|
| 122 | Ok(()) | 
|---|
| 123 | } | 
|---|
| 124 | } | 
|---|
| 125 |  | 
|---|
| 126 | /// This type represents an arbitrary number of error spans in a way that makes | 
|---|
| 127 | /// it convenient to notate the regex pattern. ("Notate" means "point out | 
|---|
| 128 | /// exactly where the error occurred in the regex pattern.") | 
|---|
| 129 | /// | 
|---|
| 130 | /// Technically, we can only ever have two spans given our current error | 
|---|
| 131 | /// structure. However, after toiling with a specific algorithm for handling | 
|---|
| 132 | /// two spans, it became obvious that an algorithm to handle an arbitrary | 
|---|
| 133 | /// number of spans was actually much simpler. | 
|---|
| 134 | struct Spans<'p> { | 
|---|
| 135 | /// The original regex pattern string. | 
|---|
| 136 | pattern: &'p str, | 
|---|
| 137 | /// The total width that should be used for line numbers. The width is | 
|---|
| 138 | /// used for left padding the line numbers for alignment. | 
|---|
| 139 | /// | 
|---|
| 140 | /// A value of `0` means line numbers should not be displayed. That is, | 
|---|
| 141 | /// the pattern is itself only one line. | 
|---|
| 142 | line_number_width: usize, | 
|---|
| 143 | /// All error spans that occur on a single line. This sequence always has | 
|---|
| 144 | /// length equivalent to the number of lines in `pattern`, where the index | 
|---|
| 145 | /// of the sequence represents a line number, starting at `0`. The spans | 
|---|
| 146 | /// in each line are sorted in ascending order. | 
|---|
| 147 | by_line: Vec<Vec<ast::Span>>, | 
|---|
| 148 | /// All error spans that occur over one or more lines. That is, the start | 
|---|
| 149 | /// and end position of the span have different line numbers. The spans are | 
|---|
| 150 | /// sorted in ascending order. | 
|---|
| 151 | multi_line: Vec<ast::Span>, | 
|---|
| 152 | } | 
|---|
| 153 |  | 
|---|
| 154 | impl<'p> Spans<'p> { | 
|---|
| 155 | /// Build a sequence of spans from a formatter. | 
|---|
| 156 | fn from_formatter<'e, E: core::fmt::Display>( | 
|---|
| 157 | fmter: &'p Formatter<'e, E>, | 
|---|
| 158 | ) -> Spans<'p> { | 
|---|
| 159 | let mut line_count = fmter.pattern.lines().count(); | 
|---|
| 160 | // If the pattern ends with a `\n` literal, then our line count is | 
|---|
| 161 | // off by one, since a span can occur immediately after the last `\n`, | 
|---|
| 162 | // which is consider to be an additional line. | 
|---|
| 163 | if fmter.pattern.ends_with( '\n ') { | 
|---|
| 164 | line_count += 1; | 
|---|
| 165 | } | 
|---|
| 166 | let line_number_width = | 
|---|
| 167 | if line_count <= 1 { 0 } else { line_count.to_string().len() }; | 
|---|
| 168 | let mut spans = Spans { | 
|---|
| 169 | pattern: &fmter.pattern, | 
|---|
| 170 | line_number_width, | 
|---|
| 171 | by_line: vec![vec![]; line_count], | 
|---|
| 172 | multi_line: vec![], | 
|---|
| 173 | }; | 
|---|
| 174 | spans.add(fmter.span.clone()); | 
|---|
| 175 | if let Some(span) = fmter.aux_span { | 
|---|
| 176 | spans.add(span.clone()); | 
|---|
| 177 | } | 
|---|
| 178 | spans | 
|---|
| 179 | } | 
|---|
| 180 |  | 
|---|
| 181 | /// Add the given span to this sequence, putting it in the right place. | 
|---|
| 182 | fn add(&mut self, span: ast::Span) { | 
|---|
| 183 | // This is grossly inefficient since we sort after each add, but right | 
|---|
| 184 | // now, we only ever add two spans at most. | 
|---|
| 185 | if span.is_one_line() { | 
|---|
| 186 | let i = span.start.line - 1; // because lines are 1-indexed | 
|---|
| 187 | self.by_line[i].push(span); | 
|---|
| 188 | self.by_line[i].sort(); | 
|---|
| 189 | } else { | 
|---|
| 190 | self.multi_line.push(span); | 
|---|
| 191 | self.multi_line.sort(); | 
|---|
| 192 | } | 
|---|
| 193 | } | 
|---|
| 194 |  | 
|---|
| 195 | /// Notate the pattern string with carents (`^`) pointing at each span | 
|---|
| 196 | /// location. This only applies to spans that occur within a single line. | 
|---|
| 197 | fn notate(&self) -> String { | 
|---|
| 198 | let mut notated = String::new(); | 
|---|
| 199 | for (i, line) in self.pattern.lines().enumerate() { | 
|---|
| 200 | if self.line_number_width > 0 { | 
|---|
| 201 | notated.push_str(&self.left_pad_line_number(i + 1)); | 
|---|
| 202 | notated.push_str( ": "); | 
|---|
| 203 | } else { | 
|---|
| 204 | notated.push_str( "    "); | 
|---|
| 205 | } | 
|---|
| 206 | notated.push_str(line); | 
|---|
| 207 | notated.push( '\n '); | 
|---|
| 208 | if let Some(notes) = self.notate_line(i) { | 
|---|
| 209 | notated.push_str(¬es); | 
|---|
| 210 | notated.push( '\n '); | 
|---|
| 211 | } | 
|---|
| 212 | } | 
|---|
| 213 | notated | 
|---|
| 214 | } | 
|---|
| 215 |  | 
|---|
| 216 | /// Return notes for the line indexed at `i` (zero-based). If there are no | 
|---|
| 217 | /// spans for the given line, then `None` is returned. Otherwise, an | 
|---|
| 218 | /// appropriately space padded string with correctly positioned `^` is | 
|---|
| 219 | /// returned, accounting for line numbers. | 
|---|
| 220 | fn notate_line(&self, i: usize) -> Option<String> { | 
|---|
| 221 | let spans = &self.by_line[i]; | 
|---|
| 222 | if spans.is_empty() { | 
|---|
| 223 | return None; | 
|---|
| 224 | } | 
|---|
| 225 | let mut notes = String::new(); | 
|---|
| 226 | for _ in 0..self.line_number_padding() { | 
|---|
| 227 | notes.push( ' '); | 
|---|
| 228 | } | 
|---|
| 229 | let mut pos = 0; | 
|---|
| 230 | for span in spans { | 
|---|
| 231 | for _ in pos..(span.start.column - 1) { | 
|---|
| 232 | notes.push( ' '); | 
|---|
| 233 | pos += 1; | 
|---|
| 234 | } | 
|---|
| 235 | let note_len = span.end.column.saturating_sub(span.start.column); | 
|---|
| 236 | for _ in 0..core::cmp::max(1, note_len) { | 
|---|
| 237 | notes.push( '^'); | 
|---|
| 238 | pos += 1; | 
|---|
| 239 | } | 
|---|
| 240 | } | 
|---|
| 241 | Some(notes) | 
|---|
| 242 | } | 
|---|
| 243 |  | 
|---|
| 244 | /// Left pad the given line number with spaces such that it is aligned with | 
|---|
| 245 | /// other line numbers. | 
|---|
| 246 | fn left_pad_line_number(&self, n: usize) -> String { | 
|---|
| 247 | let n = n.to_string(); | 
|---|
| 248 | let pad = self.line_number_width.checked_sub(n.len()).unwrap(); | 
|---|
| 249 | let mut result = repeat_char( ' ', pad); | 
|---|
| 250 | result.push_str(&n); | 
|---|
| 251 | result | 
|---|
| 252 | } | 
|---|
| 253 |  | 
|---|
| 254 | /// Return the line number padding beginning at the start of each line of | 
|---|
| 255 | /// the pattern. | 
|---|
| 256 | /// | 
|---|
| 257 | /// If the pattern is only one line, then this returns a fixed padding | 
|---|
| 258 | /// for visual indentation. | 
|---|
| 259 | fn line_number_padding(&self) -> usize { | 
|---|
| 260 | if self.line_number_width == 0 { | 
|---|
| 261 | 4 | 
|---|
| 262 | } else { | 
|---|
| 263 | 2 + self.line_number_width | 
|---|
| 264 | } | 
|---|
| 265 | } | 
|---|
| 266 | } | 
|---|
| 267 |  | 
|---|
| 268 | fn repeat_char(c: char, count: usize) -> String { | 
|---|
| 269 | core::iter::repeat(elt:c).take(count).collect() | 
|---|
| 270 | } | 
|---|
| 271 |  | 
|---|
| 272 | #[ cfg(test)] | 
|---|
| 273 | mod tests { | 
|---|
| 274 | use alloc::string::ToString; | 
|---|
| 275 |  | 
|---|
| 276 | use crate::ast::parse::Parser; | 
|---|
| 277 |  | 
|---|
| 278 | fn assert_panic_message(pattern: &str, expected_msg: &str) { | 
|---|
| 279 | let result = Parser::new().parse(pattern); | 
|---|
| 280 | match result { | 
|---|
| 281 | Ok(_) => { | 
|---|
| 282 | panic!( "regex should not have parsed"); | 
|---|
| 283 | } | 
|---|
| 284 | Err(err) => { | 
|---|
| 285 | assert_eq!(err.to_string(), expected_msg.trim()); | 
|---|
| 286 | } | 
|---|
| 287 | } | 
|---|
| 288 | } | 
|---|
| 289 |  | 
|---|
| 290 | // See: https://github.com/rust-lang/regex/issues/464 | 
|---|
| 291 | #[ test] | 
|---|
| 292 | fn regression_464() { | 
|---|
| 293 | let err = Parser::new().parse( "a{\n ").unwrap_err(); | 
|---|
| 294 | // This test checks that the error formatter doesn't panic. | 
|---|
| 295 | assert!(!err.to_string().is_empty()); | 
|---|
| 296 | } | 
|---|
| 297 |  | 
|---|
| 298 | // See: https://github.com/rust-lang/regex/issues/545 | 
|---|
| 299 | #[ test] | 
|---|
| 300 | fn repetition_quantifier_expects_a_valid_decimal() { | 
|---|
| 301 | assert_panic_message( | 
|---|
| 302 | r"\\u{[^}]*}", | 
|---|
| 303 | r#" | 
|---|
| 304 | regex parse error: | 
|---|
| 305 |     \\u{[^}]*} | 
|---|
| 306 |         ^ | 
|---|
| 307 | error: repetition quantifier expects a valid decimal | 
|---|
| 308 | "#, | 
|---|
| 309 | ); | 
|---|
| 310 | } | 
|---|
| 311 | } | 
|---|
| 312 |  | 
|---|