1 | use crate::Encoding; |
2 | use crate::reader::lexer::Token; |
3 | |
4 | use std::borrow::Cow; |
5 | use std::error; |
6 | use std::error::Error as _; |
7 | use std::fmt; |
8 | use std::io; |
9 | use std::str; |
10 | |
11 | use crate::common::{Position, TextPosition}; |
12 | use crate::util; |
13 | |
14 | #[derive (Debug)] |
15 | pub enum ErrorKind { |
16 | Syntax(Cow<'static, str>), |
17 | Io(io::Error), |
18 | Utf8(str::Utf8Error), |
19 | UnexpectedEof, |
20 | } |
21 | |
22 | #[derive (Debug, Clone, PartialEq)] |
23 | #[non_exhaustive ] |
24 | pub(crate) enum SyntaxError { |
25 | CannotRedefineXmlnsPrefix, |
26 | CannotRedefineXmlPrefix, |
27 | /// Recursive custom entity expanded to too many chars, it could be DoS |
28 | EntityTooBig, |
29 | EmptyEntity, |
30 | NoRootElement, |
31 | ProcessingInstructionWithoutName, |
32 | UnbalancedRootElement, |
33 | UnexpectedEof, |
34 | UnexpectedOpeningTag, |
35 | /// Missing `]]>` |
36 | UnclosedCdata, |
37 | UnexpectedQualifiedName(Token), |
38 | UnexpectedTokenOutsideRoot(Token), |
39 | UnexpectedToken(Token), |
40 | UnexpectedTokenInEntity(Token), |
41 | UnexpectedTokenInClosingTag(Token), |
42 | UnexpectedTokenInOpeningTag(Token), |
43 | InvalidQualifiedName(Box<str>), |
44 | UnboundAttribute(Box<str>), |
45 | UnboundElementPrefix(Box<str>), |
46 | UnexpectedClosingTag(Box<str>), |
47 | UnexpectedName(Box<str>), |
48 | /// Found <?xml-like PI not at the beginning of a document, |
49 | /// which is an error, see section 2.6 of XML 1.1 spec |
50 | UnexpectedProcessingInstruction(Box<str>, Token), |
51 | CannotUndefinePrefix(Box<str>), |
52 | InvalidCharacterEntity(u32), |
53 | InvalidDefaultNamespace(Box<str>), |
54 | InvalidNamePrefix(Box<str>), |
55 | InvalidNumericEntity(Box<str>), |
56 | InvalidStandaloneDeclaration(Box<str>), |
57 | InvalidXmlProcessingInstruction(Box<str>), |
58 | RedefinedAttribute(Box<str>), |
59 | UndefinedEntity(Box<str>), |
60 | UnexpectedEntity(Box<str>), |
61 | UnexpectedNameInsideXml(Box<str>), |
62 | UnsupportedEncoding(Box<str>), |
63 | /// In DTD |
64 | UnknownMarkupDeclaration(Box<str>), |
65 | UnexpectedXmlVersion(Box<str>), |
66 | ConflictingEncoding(Encoding, Encoding), |
67 | UnexpectedTokenBefore(&'static str, char), |
68 | /// Document has more stuff than `ParserConfig` allows |
69 | ExceededConfiguredLimit, |
70 | } |
71 | |
72 | impl fmt::Display for SyntaxError { |
73 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
74 | self.to_cow().fmt(f) |
75 | } |
76 | } |
77 | |
78 | impl SyntaxError { |
79 | #[inline (never)] |
80 | #[cold ] |
81 | pub(crate) fn to_cow(&self) -> Cow<'static, str> { |
82 | match *self { |
83 | Self::CannotRedefineXmlnsPrefix => "Cannot redefine XMLNS prefix" .into(), |
84 | Self::CannotRedefineXmlPrefix => "Default XMLNS prefix cannot be rebound to another value" .into(), |
85 | Self::EmptyEntity => "Encountered empty entity" .into(), |
86 | Self::EntityTooBig => "Entity too big" .into(), |
87 | Self::NoRootElement => "Unexpected end of stream: no root element found" .into(), |
88 | Self::ProcessingInstructionWithoutName => "Encountered processing instruction without a name" .into(), |
89 | Self::UnbalancedRootElement => "Unexpected end of stream: still inside the root element" .into(), |
90 | Self::UnclosedCdata => "Unclosed <![CDATA[" .into(), |
91 | Self::UnexpectedEof => "Unexpected end of stream" .into(), |
92 | Self::UnexpectedOpeningTag => "'<' is not allowed in attributes" .into(), |
93 | Self::CannotUndefinePrefix(ref ln) => format!("Cannot undefine prefix ' {ln}'" ).into(), |
94 | Self::ConflictingEncoding(a, b) => format!("Declared encoding {a}, but uses {b}" ).into(), |
95 | Self::InvalidCharacterEntity(num) => format!("Invalid character U+ {num:04X}" ).into(), |
96 | Self::InvalidDefaultNamespace(ref name) => format!( "Namespace ' {name}' cannot be default" ).into(), |
97 | Self::InvalidNamePrefix(ref prefix) => format!("' {prefix}' cannot be an element name prefix" ).into(), |
98 | Self::InvalidNumericEntity(ref v) => format!("Invalid numeric entity: {v}" ).into(), |
99 | Self::InvalidQualifiedName(ref e) => format!("Qualified name is invalid: {e}" ).into(), |
100 | Self::InvalidStandaloneDeclaration(ref value) => format!("Invalid standalone declaration value: {value}" ).into(), |
101 | Self::InvalidXmlProcessingInstruction(ref name) => format!("Invalid processing instruction: <? {name} - \"<?xml \"-like PI is only valid at the beginning of the document" ).into(), |
102 | Self::RedefinedAttribute(ref name) => format!("Attribute ' {name}' is redefined" ).into(), |
103 | Self::UnboundAttribute(ref name) => format!("Attribute {name} prefix is unbound" ).into(), |
104 | Self::UnboundElementPrefix(ref name) => format!("Element {name} prefix is unbound" ).into(), |
105 | Self::UndefinedEntity(ref v) => format!("Undefined entity: {v}" ).into(), |
106 | Self::UnexpectedClosingTag(ref expected_got) => format!("Unexpected closing tag: {expected_got}" ).into(), |
107 | Self::UnexpectedEntity(ref name) => format!("Unexpected entity: {name}" ).into(), |
108 | Self::UnexpectedName(ref name) => format!("Unexpected name: {name}" ).into(), |
109 | Self::UnexpectedNameInsideXml(ref name) => format!("Unexpected name inside XML declaration: {name}" ).into(), |
110 | Self::UnexpectedProcessingInstruction(ref buf, token) => format!("Unexpected token inside processing instruction: <? {buf}{token}" ).into(), |
111 | Self::UnexpectedQualifiedName(e) => format!("Unexpected token inside qualified name: {e}" ).into(), |
112 | Self::UnexpectedToken(token) => format!("Unexpected token: {token}" ).into(), |
113 | Self::UnexpectedTokenBefore(before, c) => format!("Unexpected token ' {before}' before ' {c}'" ).into(), |
114 | Self::UnexpectedTokenInClosingTag(token) => format!("Unexpected token inside closing tag: {token}" ).into(), |
115 | Self::UnexpectedTokenInEntity(token) => format!("Unexpected token inside entity: {token}" ).into(), |
116 | Self::UnexpectedTokenInOpeningTag(token) => format!("Unexpected token inside opening tag: {token}" ).into(), |
117 | Self::UnexpectedTokenOutsideRoot(token) => format!("Unexpected characters outside the root element: {token}" ).into(), |
118 | Self::UnexpectedXmlVersion(ref version) => format!("Invalid XML version: {version}" ).into(), |
119 | Self::UnknownMarkupDeclaration(ref v) => format!("Unknown markup declaration: {v}" ).into(), |
120 | Self::UnsupportedEncoding(ref v) => format!("Unsupported encoding: {v}" ).into(), |
121 | Self::ExceededConfiguredLimit => "This document is larger/more complex than allowed by the parser's configuration" .into(), |
122 | } |
123 | } |
124 | } |
125 | |
126 | /// An XML parsing error. |
127 | /// |
128 | /// Consists of a 2D position in a document and a textual message describing the error. |
129 | #[derive (Clone, PartialEq, Eq, Debug)] |
130 | pub struct Error { |
131 | pub(crate) pos: TextPosition, |
132 | pub(crate) kind: ErrorKind, |
133 | } |
134 | |
135 | impl fmt::Display for Error { |
136 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
137 | use self::ErrorKind::{Io, Syntax, UnexpectedEof, Utf8}; |
138 | |
139 | write!(f, " {} " , self.pos)?; |
140 | match &self.kind { |
141 | Io(io_error: &Error) => io_error.fmt(f), |
142 | Utf8(reason: &Utf8Error) => reason.fmt(f), |
143 | Syntax(msg: &Cow<'_, str>) => f.write_str(data:msg), |
144 | UnexpectedEof => f.write_str(data:"Unexpected EOF" ), |
145 | } |
146 | } |
147 | } |
148 | |
149 | impl Position for Error { |
150 | #[inline ] |
151 | fn position(&self) -> TextPosition { self.pos } |
152 | } |
153 | |
154 | impl Error { |
155 | /// Returns a reference to a message which is contained inside this error. |
156 | #[cold ] |
157 | #[doc (hidden)] |
158 | #[allow (deprecated)] |
159 | #[must_use ] pub fn msg(&self) -> &str { |
160 | use self::ErrorKind::{Io, Syntax, UnexpectedEof, Utf8}; |
161 | match &self.kind { |
162 | Io(io_error: &Error) => io_error.description(), |
163 | Utf8(reason: &Utf8Error) => reason.description(), |
164 | Syntax(msg: &Cow<'_, str>) => msg.as_ref(), |
165 | UnexpectedEof => "Unexpected EOF" , |
166 | } |
167 | } |
168 | |
169 | #[must_use ] |
170 | #[inline ] |
171 | pub fn kind(&self) -> &ErrorKind { |
172 | &self.kind |
173 | } |
174 | } |
175 | |
176 | impl error::Error for Error { |
177 | #[allow (deprecated)] |
178 | #[cold ] |
179 | fn description(&self) -> &str { self.msg() } |
180 | } |
181 | |
182 | impl<'a, P, M> From<(&'a P, M)> for Error where P: Position, M: Into<Cow<'static, str>> { |
183 | #[cold ] |
184 | fn from(orig: (&'a P, M)) -> Self { |
185 | Error { |
186 | pos: orig.0.position(), |
187 | kind: ErrorKind::Syntax(orig.1.into()), |
188 | } |
189 | } |
190 | } |
191 | |
192 | impl From<util::CharReadError> for Error { |
193 | #[cold ] |
194 | fn from(e: util::CharReadError) -> Self { |
195 | use crate::util::CharReadError::{Io, UnexpectedEof, Utf8}; |
196 | Error { |
197 | pos: TextPosition::new(), |
198 | kind: match e { |
199 | UnexpectedEof => ErrorKind::UnexpectedEof, |
200 | Utf8(reason: Utf8Error) => ErrorKind::Utf8(reason), |
201 | Io(io_error: Error) => ErrorKind::Io(io_error), |
202 | }, |
203 | } |
204 | } |
205 | } |
206 | |
207 | impl From<io::Error> for Error { |
208 | #[cold ] |
209 | fn from(e: io::Error) -> Self { |
210 | Error { |
211 | pos: TextPosition::new(), |
212 | kind: ErrorKind::Io(e), |
213 | } |
214 | } |
215 | } |
216 | |
217 | impl Clone for ErrorKind { |
218 | #[cold ] |
219 | fn clone(&self) -> Self { |
220 | use self::ErrorKind::{Io, Syntax, UnexpectedEof, Utf8}; |
221 | match self { |
222 | UnexpectedEof => UnexpectedEof, |
223 | Utf8(reason: &Utf8Error) => Utf8(*reason), |
224 | Io(io_error: &Error) => Io(io::Error::new(io_error.kind(), error:io_error.to_string())), |
225 | Syntax(msg: &Cow<'_, str>) => Syntax(msg.clone()), |
226 | } |
227 | } |
228 | } |
229 | impl PartialEq for ErrorKind { |
230 | #[allow (deprecated)] |
231 | fn eq(&self, other: &ErrorKind) -> bool { |
232 | use self::ErrorKind::{Io, Syntax, UnexpectedEof, Utf8}; |
233 | match (self, other) { |
234 | (UnexpectedEof, UnexpectedEof) => true, |
235 | (Utf8(left: &Utf8Error), Utf8(right: &Utf8Error)) => left == right, |
236 | (Io(left: &Error), Io(right: &Error)) => |
237 | left.kind() == right.kind() && |
238 | left.description() == right.description(), |
239 | (Syntax(left: &Cow<'_, str>), Syntax(right: &Cow<'_, str>)) => |
240 | left == right, |
241 | |
242 | (_, _) => false, |
243 | } |
244 | } |
245 | } |
246 | impl Eq for ErrorKind {} |
247 | |
248 | #[test ] |
249 | fn err_size() { |
250 | assert!(std::mem::size_of::<SyntaxError>() <= 24); |
251 | } |
252 | |