1 | //! When serializing or deserializing JSON goes wrong. |
2 | |
3 | use crate::io; |
4 | use alloc::boxed::Box; |
5 | use alloc::string::{String, ToString}; |
6 | use core::fmt::{self, Debug, Display}; |
7 | use core::result; |
8 | use core::str::FromStr; |
9 | use serde::{de, ser}; |
10 | #[cfg (feature = "std" )] |
11 | use std::error; |
12 | #[cfg (feature = "std" )] |
13 | use std::io::ErrorKind; |
14 | |
15 | /// This type represents all possible errors that can occur when serializing or |
16 | /// deserializing JSON data. |
17 | pub struct Error { |
18 | /// This `Box` allows us to keep the size of `Error` as small as possible. A |
19 | /// larger `Error` type was substantially slower due to all the functions |
20 | /// that pass around `Result<T, Error>`. |
21 | err: Box<ErrorImpl>, |
22 | } |
23 | |
24 | /// Alias for a `Result` with the error type `serde_json::Error`. |
25 | pub type Result<T> = result::Result<T, Error>; |
26 | |
27 | impl Error { |
28 | /// One-based line number at which the error was detected. |
29 | /// |
30 | /// Characters in the first line of the input (before the first newline |
31 | /// character) are in line 1. |
32 | pub fn line(&self) -> usize { |
33 | self.err.line |
34 | } |
35 | |
36 | /// One-based column number at which the error was detected. |
37 | /// |
38 | /// The first character in the input and any characters immediately |
39 | /// following a newline character are in column 1. |
40 | /// |
41 | /// Note that errors may occur in column 0, for example if a read from an |
42 | /// I/O stream fails immediately following a previously read newline |
43 | /// character. |
44 | pub fn column(&self) -> usize { |
45 | self.err.column |
46 | } |
47 | |
48 | /// Categorizes the cause of this error. |
49 | /// |
50 | /// - `Category::Io` - failure to read or write bytes on an I/O stream |
51 | /// - `Category::Syntax` - input that is not syntactically valid JSON |
52 | /// - `Category::Data` - input data that is semantically incorrect |
53 | /// - `Category::Eof` - unexpected end of the input data |
54 | pub fn classify(&self) -> Category { |
55 | match self.err.code { |
56 | ErrorCode::Message(_) => Category::Data, |
57 | ErrorCode::Io(_) => Category::Io, |
58 | ErrorCode::EofWhileParsingList |
59 | | ErrorCode::EofWhileParsingObject |
60 | | ErrorCode::EofWhileParsingString |
61 | | ErrorCode::EofWhileParsingValue => Category::Eof, |
62 | ErrorCode::ExpectedColon |
63 | | ErrorCode::ExpectedListCommaOrEnd |
64 | | ErrorCode::ExpectedObjectCommaOrEnd |
65 | | ErrorCode::ExpectedSomeIdent |
66 | | ErrorCode::ExpectedSomeValue |
67 | | ErrorCode::ExpectedDoubleQuote |
68 | | ErrorCode::InvalidEscape |
69 | | ErrorCode::InvalidNumber |
70 | | ErrorCode::NumberOutOfRange |
71 | | ErrorCode::InvalidUnicodeCodePoint |
72 | | ErrorCode::ControlCharacterWhileParsingString |
73 | | ErrorCode::KeyMustBeAString |
74 | | ErrorCode::ExpectedNumericKey |
75 | | ErrorCode::FloatKeyMustBeFinite |
76 | | ErrorCode::LoneLeadingSurrogateInHexEscape |
77 | | ErrorCode::TrailingComma |
78 | | ErrorCode::TrailingCharacters |
79 | | ErrorCode::UnexpectedEndOfHexEscape |
80 | | ErrorCode::RecursionLimitExceeded => Category::Syntax, |
81 | } |
82 | } |
83 | |
84 | /// Returns true if this error was caused by a failure to read or write |
85 | /// bytes on an I/O stream. |
86 | pub fn is_io(&self) -> bool { |
87 | self.classify() == Category::Io |
88 | } |
89 | |
90 | /// Returns true if this error was caused by input that was not |
91 | /// syntactically valid JSON. |
92 | pub fn is_syntax(&self) -> bool { |
93 | self.classify() == Category::Syntax |
94 | } |
95 | |
96 | /// Returns true if this error was caused by input data that was |
97 | /// semantically incorrect. |
98 | /// |
99 | /// For example, JSON containing a number is semantically incorrect when the |
100 | /// type being deserialized into holds a String. |
101 | pub fn is_data(&self) -> bool { |
102 | self.classify() == Category::Data |
103 | } |
104 | |
105 | /// Returns true if this error was caused by prematurely reaching the end of |
106 | /// the input data. |
107 | /// |
108 | /// Callers that process streaming input may be interested in retrying the |
109 | /// deserialization once more data is available. |
110 | pub fn is_eof(&self) -> bool { |
111 | self.classify() == Category::Eof |
112 | } |
113 | |
114 | /// The kind reported by the underlying standard library I/O error, if this |
115 | /// error was caused by a failure to read or write bytes on an I/O stream. |
116 | /// |
117 | /// # Example |
118 | /// |
119 | /// ``` |
120 | /// use serde_json::Value; |
121 | /// use std::io::{self, ErrorKind, Read}; |
122 | /// use std::process; |
123 | /// |
124 | /// struct ReaderThatWillTimeOut<'a>(&'a [u8]); |
125 | /// |
126 | /// impl<'a> Read for ReaderThatWillTimeOut<'a> { |
127 | /// fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { |
128 | /// if self.0.is_empty() { |
129 | /// Err(io::Error::new(ErrorKind::TimedOut, "timed out" )) |
130 | /// } else { |
131 | /// self.0.read(buf) |
132 | /// } |
133 | /// } |
134 | /// } |
135 | /// |
136 | /// fn main() { |
137 | /// let reader = ReaderThatWillTimeOut(br#" {"k": "# ); |
138 | /// |
139 | /// let _: Value = match serde_json::from_reader(reader) { |
140 | /// Ok(value) => value, |
141 | /// Err(error) => { |
142 | /// if error.io_error_kind() == Some(ErrorKind::TimedOut) { |
143 | /// // Maybe this application needs to retry certain kinds of errors. |
144 | /// |
145 | /// # return; |
146 | /// } else { |
147 | /// eprintln!("error: {}" , error); |
148 | /// process::exit(1); |
149 | /// } |
150 | /// } |
151 | /// }; |
152 | /// } |
153 | /// ``` |
154 | #[cfg (feature = "std" )] |
155 | pub fn io_error_kind(&self) -> Option<ErrorKind> { |
156 | if let ErrorCode::Io(io_error) = &self.err.code { |
157 | Some(io_error.kind()) |
158 | } else { |
159 | None |
160 | } |
161 | } |
162 | } |
163 | |
164 | /// Categorizes the cause of a `serde_json::Error`. |
165 | #[derive(Copy, Clone, PartialEq, Eq, Debug)] |
166 | pub enum Category { |
167 | /// The error was caused by a failure to read or write bytes on an I/O |
168 | /// stream. |
169 | Io, |
170 | |
171 | /// The error was caused by input that was not syntactically valid JSON. |
172 | Syntax, |
173 | |
174 | /// The error was caused by input data that was semantically incorrect. |
175 | /// |
176 | /// For example, JSON containing a number is semantically incorrect when the |
177 | /// type being deserialized into holds a String. |
178 | Data, |
179 | |
180 | /// The error was caused by prematurely reaching the end of the input data. |
181 | /// |
182 | /// Callers that process streaming input may be interested in retrying the |
183 | /// deserialization once more data is available. |
184 | Eof, |
185 | } |
186 | |
187 | #[cfg (feature = "std" )] |
188 | #[allow (clippy::fallible_impl_from)] |
189 | impl From<Error> for io::Error { |
190 | /// Convert a `serde_json::Error` into an `io::Error`. |
191 | /// |
192 | /// JSON syntax and data errors are turned into `InvalidData` I/O errors. |
193 | /// EOF errors are turned into `UnexpectedEof` I/O errors. |
194 | /// |
195 | /// ``` |
196 | /// use std::io; |
197 | /// |
198 | /// enum MyError { |
199 | /// Io(io::Error), |
200 | /// Json(serde_json::Error), |
201 | /// } |
202 | /// |
203 | /// impl From<serde_json::Error> for MyError { |
204 | /// fn from(err: serde_json::Error) -> MyError { |
205 | /// use serde_json::error::Category; |
206 | /// match err.classify() { |
207 | /// Category::Io => { |
208 | /// MyError::Io(err.into()) |
209 | /// } |
210 | /// Category::Syntax | Category::Data | Category::Eof => { |
211 | /// MyError::Json(err) |
212 | /// } |
213 | /// } |
214 | /// } |
215 | /// } |
216 | /// ``` |
217 | fn from(j: Error) -> Self { |
218 | if let ErrorCode::Io(err) = j.err.code { |
219 | err |
220 | } else { |
221 | match j.classify() { |
222 | Category::Io => unreachable!(), |
223 | Category::Syntax | Category::Data => io::Error::new(ErrorKind::InvalidData, j), |
224 | Category::Eof => io::Error::new(ErrorKind::UnexpectedEof, j), |
225 | } |
226 | } |
227 | } |
228 | } |
229 | |
230 | struct ErrorImpl { |
231 | code: ErrorCode, |
232 | line: usize, |
233 | column: usize, |
234 | } |
235 | |
236 | pub(crate) enum ErrorCode { |
237 | /// Catchall for syntax error messages |
238 | Message(Box<str>), |
239 | |
240 | /// Some I/O error occurred while serializing or deserializing. |
241 | Io(io::Error), |
242 | |
243 | /// EOF while parsing a list. |
244 | EofWhileParsingList, |
245 | |
246 | /// EOF while parsing an object. |
247 | EofWhileParsingObject, |
248 | |
249 | /// EOF while parsing a string. |
250 | EofWhileParsingString, |
251 | |
252 | /// EOF while parsing a JSON value. |
253 | EofWhileParsingValue, |
254 | |
255 | /// Expected this character to be a `':'`. |
256 | ExpectedColon, |
257 | |
258 | /// Expected this character to be either a `','` or a `']'`. |
259 | ExpectedListCommaOrEnd, |
260 | |
261 | /// Expected this character to be either a `','` or a `'}'`. |
262 | ExpectedObjectCommaOrEnd, |
263 | |
264 | /// Expected to parse either a `true`, `false`, or a `null`. |
265 | ExpectedSomeIdent, |
266 | |
267 | /// Expected this character to start a JSON value. |
268 | ExpectedSomeValue, |
269 | |
270 | /// Expected this character to be a `"`. |
271 | ExpectedDoubleQuote, |
272 | |
273 | /// Invalid hex escape code. |
274 | InvalidEscape, |
275 | |
276 | /// Invalid number. |
277 | InvalidNumber, |
278 | |
279 | /// Number is bigger than the maximum value of its type. |
280 | NumberOutOfRange, |
281 | |
282 | /// Invalid unicode code point. |
283 | InvalidUnicodeCodePoint, |
284 | |
285 | /// Control character found while parsing a string. |
286 | ControlCharacterWhileParsingString, |
287 | |
288 | /// Object key is not a string. |
289 | KeyMustBeAString, |
290 | |
291 | /// Contents of key were supposed to be a number. |
292 | ExpectedNumericKey, |
293 | |
294 | /// Object key is a non-finite float value. |
295 | FloatKeyMustBeFinite, |
296 | |
297 | /// Lone leading surrogate in hex escape. |
298 | LoneLeadingSurrogateInHexEscape, |
299 | |
300 | /// JSON has a comma after the last value in an array or map. |
301 | TrailingComma, |
302 | |
303 | /// JSON has non-whitespace trailing characters after the value. |
304 | TrailingCharacters, |
305 | |
306 | /// Unexpected end of hex escape. |
307 | UnexpectedEndOfHexEscape, |
308 | |
309 | /// Encountered nesting of JSON maps and arrays more than 128 layers deep. |
310 | RecursionLimitExceeded, |
311 | } |
312 | |
313 | impl Error { |
314 | #[cold ] |
315 | pub(crate) fn syntax(code: ErrorCode, line: usize, column: usize) -> Self { |
316 | Error { |
317 | err: Box::new(ErrorImpl { code, line, column }), |
318 | } |
319 | } |
320 | |
321 | // Not public API. Should be pub(crate). |
322 | // |
323 | // Update `eager_json` crate when this function changes. |
324 | #[doc (hidden)] |
325 | #[cold ] |
326 | pub fn io(error: io::Error) -> Self { |
327 | Error { |
328 | err: Box::new(ErrorImpl { |
329 | code: ErrorCode::Io(error), |
330 | line: 0, |
331 | column: 0, |
332 | }), |
333 | } |
334 | } |
335 | |
336 | #[cold ] |
337 | pub(crate) fn fix_position<F>(self, f: F) -> Self |
338 | where |
339 | F: FnOnce(ErrorCode) -> Error, |
340 | { |
341 | if self.err.line == 0 { |
342 | f(self.err.code) |
343 | } else { |
344 | self |
345 | } |
346 | } |
347 | } |
348 | |
349 | impl Display for ErrorCode { |
350 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
351 | match self { |
352 | ErrorCode::Message(msg) => f.write_str(msg), |
353 | ErrorCode::Io(err) => Display::fmt(err, f), |
354 | ErrorCode::EofWhileParsingList => f.write_str("EOF while parsing a list" ), |
355 | ErrorCode::EofWhileParsingObject => f.write_str("EOF while parsing an object" ), |
356 | ErrorCode::EofWhileParsingString => f.write_str("EOF while parsing a string" ), |
357 | ErrorCode::EofWhileParsingValue => f.write_str("EOF while parsing a value" ), |
358 | ErrorCode::ExpectedColon => f.write_str("expected `:`" ), |
359 | ErrorCode::ExpectedListCommaOrEnd => f.write_str("expected `,` or `]`" ), |
360 | ErrorCode::ExpectedObjectCommaOrEnd => f.write_str("expected `,` or `}`" ), |
361 | ErrorCode::ExpectedSomeIdent => f.write_str("expected ident" ), |
362 | ErrorCode::ExpectedSomeValue => f.write_str("expected value" ), |
363 | ErrorCode::ExpectedDoubleQuote => f.write_str("expected ` \"`" ), |
364 | ErrorCode::InvalidEscape => f.write_str("invalid escape" ), |
365 | ErrorCode::InvalidNumber => f.write_str("invalid number" ), |
366 | ErrorCode::NumberOutOfRange => f.write_str("number out of range" ), |
367 | ErrorCode::InvalidUnicodeCodePoint => f.write_str("invalid unicode code point" ), |
368 | ErrorCode::ControlCharacterWhileParsingString => { |
369 | f.write_str("control character ( \\u0000- \\u001F) found while parsing a string" ) |
370 | } |
371 | ErrorCode::KeyMustBeAString => f.write_str("key must be a string" ), |
372 | ErrorCode::ExpectedNumericKey => { |
373 | f.write_str("invalid value: expected key to be a number in quotes" ) |
374 | } |
375 | ErrorCode::FloatKeyMustBeFinite => { |
376 | f.write_str("float key must be finite (got NaN or +/-inf)" ) |
377 | } |
378 | ErrorCode::LoneLeadingSurrogateInHexEscape => { |
379 | f.write_str("lone leading surrogate in hex escape" ) |
380 | } |
381 | ErrorCode::TrailingComma => f.write_str("trailing comma" ), |
382 | ErrorCode::TrailingCharacters => f.write_str("trailing characters" ), |
383 | ErrorCode::UnexpectedEndOfHexEscape => f.write_str("unexpected end of hex escape" ), |
384 | ErrorCode::RecursionLimitExceeded => f.write_str("recursion limit exceeded" ), |
385 | } |
386 | } |
387 | } |
388 | |
389 | impl serde::de::StdError for Error { |
390 | #[cfg (feature = "std" )] |
391 | fn source(&self) -> Option<&(dyn error::Error + 'static)> { |
392 | match &self.err.code { |
393 | ErrorCode::Io(err) => err.source(), |
394 | _ => None, |
395 | } |
396 | } |
397 | } |
398 | |
399 | impl Display for Error { |
400 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
401 | Display::fmt(&*self.err, f) |
402 | } |
403 | } |
404 | |
405 | impl Display for ErrorImpl { |
406 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
407 | if self.line == 0 { |
408 | Display::fmt(&self.code, f) |
409 | } else { |
410 | write!( |
411 | f, |
412 | "{} at line {} column {}" , |
413 | self.code, self.line, self.column |
414 | ) |
415 | } |
416 | } |
417 | } |
418 | |
419 | // Remove two layers of verbosity from the debug representation. Humans often |
420 | // end up seeing this representation because it is what unwrap() shows. |
421 | impl Debug for Error { |
422 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
423 | write!( |
424 | f, |
425 | "Error({:?}, line: {}, column: {})" , |
426 | self.err.code.to_string(), |
427 | self.err.line, |
428 | self.err.column |
429 | ) |
430 | } |
431 | } |
432 | |
433 | impl de::Error for Error { |
434 | #[cold ] |
435 | fn custom<T: Display>(msg: T) -> Error { |
436 | make_error(msg.to_string()) |
437 | } |
438 | |
439 | #[cold ] |
440 | fn invalid_type(unexp: de::Unexpected, exp: &dyn de::Expected) -> Self { |
441 | if let de::Unexpected::Unit = unexp { |
442 | Error::custom(format_args!("invalid type: null, expected {}" , exp)) |
443 | } else { |
444 | Error::custom(format_args!("invalid type: {}, expected {}" , unexp, exp)) |
445 | } |
446 | } |
447 | } |
448 | |
449 | impl ser::Error for Error { |
450 | #[cold ] |
451 | fn custom<T: Display>(msg: T) -> Error { |
452 | make_error(msg.to_string()) |
453 | } |
454 | } |
455 | |
456 | // Parse our own error message that looks like "{} at line {} column {}" to work |
457 | // around erased-serde round-tripping the error through de::Error::custom. |
458 | fn make_error(mut msg: String) -> Error { |
459 | let (line, column) = parse_line_col(&mut msg).unwrap_or((0, 0)); |
460 | Error { |
461 | err: Box::new(ErrorImpl { |
462 | code: ErrorCode::Message(msg.into_boxed_str()), |
463 | line, |
464 | column, |
465 | }), |
466 | } |
467 | } |
468 | |
469 | fn parse_line_col(msg: &mut String) -> Option<(usize, usize)> { |
470 | let start_of_suffix = match msg.rfind(" at line " ) { |
471 | Some(index) => index, |
472 | None => return None, |
473 | }; |
474 | |
475 | // Find start and end of line number. |
476 | let start_of_line = start_of_suffix + " at line " .len(); |
477 | let mut end_of_line = start_of_line; |
478 | while starts_with_digit(&msg[end_of_line..]) { |
479 | end_of_line += 1; |
480 | } |
481 | |
482 | if !msg[end_of_line..].starts_with(" column " ) { |
483 | return None; |
484 | } |
485 | |
486 | // Find start and end of column number. |
487 | let start_of_column = end_of_line + " column " .len(); |
488 | let mut end_of_column = start_of_column; |
489 | while starts_with_digit(&msg[end_of_column..]) { |
490 | end_of_column += 1; |
491 | } |
492 | |
493 | if end_of_column < msg.len() { |
494 | return None; |
495 | } |
496 | |
497 | // Parse numbers. |
498 | let line = match usize::from_str(&msg[start_of_line..end_of_line]) { |
499 | Ok(line) => line, |
500 | Err(_) => return None, |
501 | }; |
502 | let column = match usize::from_str(&msg[start_of_column..end_of_column]) { |
503 | Ok(column) => column, |
504 | Err(_) => return None, |
505 | }; |
506 | |
507 | msg.truncate(start_of_suffix); |
508 | Some((line, column)) |
509 | } |
510 | |
511 | fn starts_with_digit(slice: &str) -> bool { |
512 | match slice.as_bytes().first() { |
513 | None => false, |
514 | Some(&byte) => byte >= b'0' && byte <= b'9' , |
515 | } |
516 | } |
517 | |