Chih-Hung Hsieh | 048fc04 | 2020-04-16 10:44:22 -0700 | [diff] [blame] | 1 | use std::cmp; |
| 2 | use std::error; |
| 3 | use std::fmt; |
| 4 | use std::result; |
| 5 | |
| 6 | use ast; |
| 7 | use hir; |
| 8 | |
| 9 | /// A type alias for dealing with errors returned by this crate. |
| 10 | pub type Result<T> = result::Result<T, Error>; |
| 11 | |
| 12 | /// This error type encompasses any error that can be returned by this crate. |
| 13 | #[derive(Clone, Debug, Eq, PartialEq)] |
| 14 | pub enum Error { |
| 15 | /// An error that occurred while translating concrete syntax into abstract |
| 16 | /// syntax (AST). |
| 17 | Parse(ast::Error), |
| 18 | /// An error that occurred while translating abstract syntax into a high |
| 19 | /// level intermediate representation (HIR). |
| 20 | Translate(hir::Error), |
| 21 | /// Hints that destructuring should not be exhaustive. |
| 22 | /// |
| 23 | /// This enum may grow additional variants, so this makes sure clients |
| 24 | /// don't count on exhaustive matching. (Otherwise, adding a new variant |
| 25 | /// could break existing code.) |
| 26 | #[doc(hidden)] |
| 27 | __Nonexhaustive, |
| 28 | } |
| 29 | |
| 30 | impl From<ast::Error> for Error { |
| 31 | fn from(err: ast::Error) -> Error { |
| 32 | Error::Parse(err) |
| 33 | } |
| 34 | } |
| 35 | |
| 36 | impl From<hir::Error> for Error { |
| 37 | fn from(err: hir::Error) -> Error { |
| 38 | Error::Translate(err) |
| 39 | } |
| 40 | } |
| 41 | |
| 42 | impl error::Error for Error { |
| 43 | // TODO: Remove this method entirely on the next breaking semver release. |
| 44 | #[allow(deprecated)] |
| 45 | fn description(&self) -> &str { |
| 46 | match *self { |
| 47 | Error::Parse(ref x) => x.description(), |
| 48 | Error::Translate(ref x) => x.description(), |
| 49 | _ => unreachable!(), |
| 50 | } |
| 51 | } |
| 52 | } |
| 53 | |
| 54 | impl fmt::Display for Error { |
| 55 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| 56 | match *self { |
| 57 | Error::Parse(ref x) => x.fmt(f), |
| 58 | Error::Translate(ref x) => x.fmt(f), |
| 59 | _ => unreachable!(), |
| 60 | } |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | /// A helper type for formatting nice error messages. |
| 65 | /// |
| 66 | /// This type is responsible for reporting regex parse errors in a nice human |
| 67 | /// readable format. Most of its complexity is from interspersing notational |
| 68 | /// markers pointing out the position where an error occurred. |
| 69 | #[derive(Debug)] |
| 70 | pub struct Formatter<'e, E: 'e> { |
| 71 | /// The original regex pattern in which the error occurred. |
| 72 | pattern: &'e str, |
| 73 | /// The error kind. It must impl fmt::Display. |
| 74 | err: &'e E, |
| 75 | /// The primary span of the error. |
| 76 | span: &'e ast::Span, |
| 77 | /// An auxiliary and optional span, in case the error needs to point to |
| 78 | /// two locations (e.g., when reporting a duplicate capture group name). |
| 79 | aux_span: Option<&'e ast::Span>, |
| 80 | } |
| 81 | |
| 82 | impl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> { |
| 83 | fn from(err: &'e ast::Error) -> Self { |
| 84 | Formatter { |
| 85 | pattern: err.pattern(), |
| 86 | err: err.kind(), |
| 87 | span: err.span(), |
| 88 | aux_span: err.auxiliary_span(), |
| 89 | } |
| 90 | } |
| 91 | } |
| 92 | |
| 93 | impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> { |
| 94 | fn from(err: &'e hir::Error) -> Self { |
| 95 | Formatter { |
| 96 | pattern: err.pattern(), |
| 97 | err: err.kind(), |
| 98 | span: err.span(), |
| 99 | aux_span: None, |
| 100 | } |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | impl<'e, E: fmt::Display> fmt::Display for Formatter<'e, E> { |
| 105 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| 106 | let spans = Spans::from_formatter(self); |
| 107 | if self.pattern.contains('\n') { |
| 108 | let divider = repeat_char('~', 79); |
| 109 | |
| 110 | writeln!(f, "regex parse error:")?; |
| 111 | writeln!(f, "{}", divider)?; |
| 112 | let notated = spans.notate(); |
| 113 | write!(f, "{}", notated)?; |
| 114 | writeln!(f, "{}", divider)?; |
| 115 | // If we have error spans that cover multiple lines, then we just |
| 116 | // note the line numbers. |
| 117 | if !spans.multi_line.is_empty() { |
| 118 | let mut notes = vec![]; |
| 119 | for span in &spans.multi_line { |
| 120 | notes.push(format!( |
| 121 | "on line {} (column {}) through line {} (column {})", |
| 122 | span.start.line, |
| 123 | span.start.column, |
| 124 | span.end.line, |
| 125 | span.end.column - 1 |
| 126 | )); |
| 127 | } |
| 128 | writeln!(f, "{}", notes.join("\n"))?; |
| 129 | } |
| 130 | write!(f, "error: {}", self.err)?; |
| 131 | } else { |
| 132 | writeln!(f, "regex parse error:")?; |
| 133 | let notated = Spans::from_formatter(self).notate(); |
| 134 | write!(f, "{}", notated)?; |
| 135 | write!(f, "error: {}", self.err)?; |
| 136 | } |
| 137 | Ok(()) |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | /// This type represents an arbitrary number of error spans in a way that makes |
| 142 | /// it convenient to notate the regex pattern. ("Notate" means "point out |
| 143 | /// exactly where the error occurred in the regex pattern.") |
| 144 | /// |
| 145 | /// Technically, we can only ever have two spans given our current error |
| 146 | /// structure. However, after toiling with a specific algorithm for handling |
| 147 | /// two spans, it became obvious that an algorithm to handle an arbitrary |
| 148 | /// number of spans was actually much simpler. |
| 149 | struct Spans<'p> { |
| 150 | /// The original regex pattern string. |
| 151 | pattern: &'p str, |
| 152 | /// The total width that should be used for line numbers. The width is |
| 153 | /// used for left padding the line numbers for alignment. |
| 154 | /// |
| 155 | /// A value of `0` means line numbers should not be displayed. That is, |
| 156 | /// the pattern is itself only one line. |
| 157 | line_number_width: usize, |
| 158 | /// All error spans that occur on a single line. This sequence always has |
| 159 | /// length equivalent to the number of lines in `pattern`, where the index |
| 160 | /// of the sequence represents a line number, starting at `0`. The spans |
| 161 | /// in each line are sorted in ascending order. |
| 162 | by_line: Vec<Vec<ast::Span>>, |
| 163 | /// All error spans that occur over one or more lines. That is, the start |
| 164 | /// and end position of the span have different line numbers. The spans are |
| 165 | /// sorted in ascending order. |
| 166 | multi_line: Vec<ast::Span>, |
| 167 | } |
| 168 | |
| 169 | impl<'p> Spans<'p> { |
| 170 | /// Build a sequence of spans from a formatter. |
| 171 | fn from_formatter<'e, E: fmt::Display>( |
| 172 | fmter: &'p Formatter<'e, E>, |
| 173 | ) -> Spans<'p> { |
| 174 | let mut line_count = fmter.pattern.lines().count(); |
| 175 | // If the pattern ends with a `\n` literal, then our line count is |
| 176 | // off by one, since a span can occur immediately after the last `\n`, |
| 177 | // which is consider to be an additional line. |
| 178 | if fmter.pattern.ends_with('\n') { |
| 179 | line_count += 1; |
| 180 | } |
| 181 | let line_number_width = |
| 182 | if line_count <= 1 { 0 } else { line_count.to_string().len() }; |
| 183 | let mut spans = Spans { |
| 184 | pattern: &fmter.pattern, |
| 185 | line_number_width: line_number_width, |
| 186 | by_line: vec![vec![]; line_count], |
| 187 | multi_line: vec![], |
| 188 | }; |
| 189 | spans.add(fmter.span.clone()); |
| 190 | if let Some(span) = fmter.aux_span { |
| 191 | spans.add(span.clone()); |
| 192 | } |
| 193 | spans |
| 194 | } |
| 195 | |
| 196 | /// Add the given span to this sequence, putting it in the right place. |
| 197 | fn add(&mut self, span: ast::Span) { |
| 198 | // This is grossly inefficient since we sort after each add, but right |
| 199 | // now, we only ever add two spans at most. |
| 200 | if span.is_one_line() { |
| 201 | let i = span.start.line - 1; // because lines are 1-indexed |
| 202 | self.by_line[i].push(span); |
| 203 | self.by_line[i].sort(); |
| 204 | } else { |
| 205 | self.multi_line.push(span); |
| 206 | self.multi_line.sort(); |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | /// Notate the pattern string with carents (`^`) pointing at each span |
| 211 | /// location. This only applies to spans that occur within a single line. |
| 212 | fn notate(&self) -> String { |
| 213 | let mut notated = String::new(); |
| 214 | for (i, line) in self.pattern.lines().enumerate() { |
| 215 | if self.line_number_width > 0 { |
| 216 | notated.push_str(&self.left_pad_line_number(i + 1)); |
| 217 | notated.push_str(": "); |
| 218 | } else { |
| 219 | notated.push_str(" "); |
| 220 | } |
| 221 | notated.push_str(line); |
| 222 | notated.push('\n'); |
| 223 | if let Some(notes) = self.notate_line(i) { |
| 224 | notated.push_str(¬es); |
| 225 | notated.push('\n'); |
| 226 | } |
| 227 | } |
| 228 | notated |
| 229 | } |
| 230 | |
| 231 | /// Return notes for the line indexed at `i` (zero-based). If there are no |
| 232 | /// spans for the given line, then `None` is returned. Otherwise, an |
| 233 | /// appropriately space padded string with correctly positioned `^` is |
| 234 | /// returned, accounting for line numbers. |
| 235 | fn notate_line(&self, i: usize) -> Option<String> { |
| 236 | let spans = &self.by_line[i]; |
| 237 | if spans.is_empty() { |
| 238 | return None; |
| 239 | } |
| 240 | let mut notes = String::new(); |
| 241 | for _ in 0..self.line_number_padding() { |
| 242 | notes.push(' '); |
| 243 | } |
| 244 | let mut pos = 0; |
| 245 | for span in spans { |
| 246 | for _ in pos..(span.start.column - 1) { |
| 247 | notes.push(' '); |
| 248 | pos += 1; |
| 249 | } |
| 250 | let note_len = span.end.column.saturating_sub(span.start.column); |
| 251 | for _ in 0..cmp::max(1, note_len) { |
| 252 | notes.push('^'); |
| 253 | pos += 1; |
| 254 | } |
| 255 | } |
| 256 | Some(notes) |
| 257 | } |
| 258 | |
| 259 | /// Left pad the given line number with spaces such that it is aligned with |
| 260 | /// other line numbers. |
| 261 | fn left_pad_line_number(&self, n: usize) -> String { |
| 262 | let n = n.to_string(); |
| 263 | let pad = self.line_number_width.checked_sub(n.len()).unwrap(); |
| 264 | let mut result = repeat_char(' ', pad); |
| 265 | result.push_str(&n); |
| 266 | result |
| 267 | } |
| 268 | |
| 269 | /// Return the line number padding beginning at the start of each line of |
| 270 | /// the pattern. |
| 271 | /// |
| 272 | /// If the pattern is only one line, then this returns a fixed padding |
| 273 | /// for visual indentation. |
| 274 | fn line_number_padding(&self) -> usize { |
| 275 | if self.line_number_width == 0 { |
| 276 | 4 |
| 277 | } else { |
| 278 | 2 + self.line_number_width |
| 279 | } |
| 280 | } |
| 281 | } |
| 282 | |
| 283 | fn repeat_char(c: char, count: usize) -> String { |
| 284 | ::std::iter::repeat(c).take(count).collect() |
| 285 | } |
| 286 | |
| 287 | #[cfg(test)] |
| 288 | mod tests { |
| 289 | use ast::parse::Parser; |
| 290 | |
| 291 | fn assert_panic_message(pattern: &str, expected_msg: &str) -> () { |
| 292 | let result = Parser::new().parse(pattern); |
| 293 | match result { |
| 294 | Ok(_) => { |
| 295 | panic!("regex should not have parsed"); |
| 296 | } |
| 297 | Err(err) => { |
| 298 | assert_eq!(err.to_string(), expected_msg.trim()); |
| 299 | } |
| 300 | } |
| 301 | } |
| 302 | |
| 303 | // See: https://github.com/rust-lang/regex/issues/464 |
| 304 | #[test] |
| 305 | fn regression_464() { |
| 306 | let err = Parser::new().parse("a{\n").unwrap_err(); |
| 307 | // This test checks that the error formatter doesn't panic. |
| 308 | assert!(!err.to_string().is_empty()); |
| 309 | } |
| 310 | |
| 311 | // See: https://github.com/rust-lang/regex/issues/545 |
| 312 | #[test] |
| 313 | fn repetition_quantifier_expects_a_valid_decimal() { |
| 314 | assert_panic_message( |
| 315 | r"\\u{[^}]*}", |
| 316 | r#" |
| 317 | regex parse error: |
| 318 | \\u{[^}]*} |
| 319 | ^ |
| 320 | error: repetition quantifier expects a valid decimal |
| 321 | "#, |
| 322 | ); |
| 323 | } |
| 324 | } |