| 1 | use crate::reader::lexer::Token; |
| 2 | use crate::Encoding; |
| 3 | |
| 4 | use std::borrow::Cow; |
| 5 | use std::error::Error as _; |
| 6 | use std::{error, fmt, io, str}; |
| 7 | |
| 8 | use crate::common::{Position, TextPosition}; |
| 9 | use crate::util; |
| 10 | |
| 11 | /// Failure reason |
| 12 | #[derive (Debug)] |
| 13 | pub enum ErrorKind { |
| 14 | /// This is an ill-formed XML document |
| 15 | Syntax(Cow<'static, str>), |
| 16 | /// Reader/writer reported an error |
| 17 | Io(io::Error), |
| 18 | /// The document contains bytes that are not allowed in UTF-8 strings |
| 19 | Utf8(str::Utf8Error), |
| 20 | /// The document ended while they were elements/comments/etc. still open |
| 21 | UnexpectedEof, |
| 22 | } |
| 23 | |
| 24 | #[derive (Debug, Clone, PartialEq)] |
| 25 | #[non_exhaustive ] |
| 26 | pub(crate) enum SyntaxError { |
| 27 | CannotRedefineXmlnsPrefix, |
| 28 | CannotRedefineXmlPrefix, |
| 29 | /// Recursive custom entity expanded to too many chars, it could be DoS |
| 30 | EntityTooBig, |
| 31 | EmptyEntity, |
| 32 | NoRootElement, |
| 33 | ProcessingInstructionWithoutName, |
| 34 | UnbalancedRootElement, |
| 35 | UnexpectedEof, |
| 36 | UnexpectedOpeningTag, |
| 37 | /// Missing `]]>` |
| 38 | UnclosedCdata, |
| 39 | UnexpectedQualifiedName(Token), |
| 40 | UnexpectedTokenOutsideRoot(Token), |
| 41 | UnexpectedToken(Token), |
| 42 | UnexpectedTokenInEntity(Token), |
| 43 | UnexpectedTokenInClosingTag(Token), |
| 44 | UnexpectedTokenInOpeningTag(Token), |
| 45 | InvalidQualifiedName(Box<str>), |
| 46 | UnboundAttribute(Box<str>), |
| 47 | UnboundElementPrefix(Box<str>), |
| 48 | UnexpectedClosingTag(Box<str>), |
| 49 | UnexpectedName(Box<str>), |
| 50 | /// Found <?xml-like PI not at the beginning of a document, |
| 51 | /// which is an error, see section 2.6 of XML 1.1 spec |
| 52 | UnexpectedProcessingInstruction(Box<str>, Token), |
| 53 | CannotUndefinePrefix(Box<str>), |
| 54 | InvalidCharacterEntity(u32), |
| 55 | InvalidDefaultNamespace(Box<str>), |
| 56 | InvalidNamePrefix(Box<str>), |
| 57 | InvalidNumericEntity(Box<str>), |
| 58 | InvalidStandaloneDeclaration(Box<str>), |
| 59 | InvalidXmlProcessingInstruction(Box<str>), |
| 60 | RedefinedAttribute(Box<str>), |
| 61 | UndefinedEntity(Box<str>), |
| 62 | UnexpectedEntity(Box<str>), |
| 63 | UnexpectedNameInsideXml(Box<str>), |
| 64 | UnsupportedEncoding(Box<str>), |
| 65 | /// In DTD |
| 66 | UnknownMarkupDeclaration(Box<str>), |
| 67 | UnexpectedXmlVersion(Box<str>), |
| 68 | ConflictingEncoding(Encoding, Encoding), |
| 69 | UnexpectedTokenBefore(&'static str, char), |
| 70 | /// Document has more stuff than `ParserConfig` allows |
| 71 | ExceededConfiguredLimit, |
| 72 | } |
| 73 | |
| 74 | impl fmt::Display for SyntaxError { |
| 75 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 76 | self.to_cow().fmt(f) |
| 77 | } |
| 78 | } |
| 79 | |
| 80 | impl SyntaxError { |
| 81 | #[inline (never)] |
| 82 | #[cold ] |
| 83 | pub(crate) fn to_cow(&self) -> Cow<'static, str> { |
| 84 | match *self { |
| 85 | Self::CannotRedefineXmlnsPrefix => "Cannot redefine XMLNS prefix" .into(), |
| 86 | Self::CannotRedefineXmlPrefix => "Default XMLNS prefix cannot be rebound to another value" .into(), |
| 87 | Self::EmptyEntity => "Encountered empty entity" .into(), |
| 88 | Self::EntityTooBig => "Entity too big" .into(), |
| 89 | Self::NoRootElement => "Unexpected end of stream: no root element found" .into(), |
| 90 | Self::ProcessingInstructionWithoutName => "Encountered processing instruction without a name" .into(), |
| 91 | Self::UnbalancedRootElement => "Unexpected end of stream: still inside the root element" .into(), |
| 92 | Self::UnclosedCdata => "Unclosed <![CDATA[" .into(), |
| 93 | Self::UnexpectedEof => "Unexpected end of stream" .into(), |
| 94 | Self::UnexpectedOpeningTag => "'<' is not allowed in attributes" .into(), |
| 95 | Self::CannotUndefinePrefix(ref ln) => format!("Cannot undefine prefix ' {ln}'" ).into(), |
| 96 | Self::ConflictingEncoding(a, b) => format!("Declared encoding {a}, but uses {b}" ).into(), |
| 97 | Self::InvalidCharacterEntity(num) => format!("Invalid character U+ {num:04X}" ).into(), |
| 98 | Self::InvalidDefaultNamespace(ref name) => format!("Namespace ' {name}' cannot be default" ).into(), |
| 99 | Self::InvalidNamePrefix(ref prefix) => format!("' {prefix}' cannot be an element name prefix" ).into(), |
| 100 | Self::InvalidNumericEntity(ref v) => format!("Invalid numeric entity: {v}" ).into(), |
| 101 | Self::InvalidQualifiedName(ref e) => format!("Qualified name is invalid: {e}" ).into(), |
| 102 | Self::InvalidStandaloneDeclaration(ref value) => format!("Invalid standalone declaration value: {value}" ).into(), |
| 103 | Self::InvalidXmlProcessingInstruction(ref name) => format!("Invalid processing instruction: <? {name}\nThe XML spec only allows \"<?xml \" at the very beginning of the file, with no whitespace, comments, or any elements before it" ).into(), |
| 104 | Self::RedefinedAttribute(ref name) => format!("Attribute ' {name}' is redefined" ).into(), |
| 105 | Self::UnboundAttribute(ref name) => format!("Attribute {name} prefix is unbound" ).into(), |
| 106 | Self::UnboundElementPrefix(ref name) => format!("Element {name} prefix is unbound" ).into(), |
| 107 | Self::UndefinedEntity(ref v) => format!("Undefined entity: {v}" ).into(), |
| 108 | Self::UnexpectedClosingTag(ref expected_got) => format!("Unexpected closing tag: {expected_got}" ).into(), |
| 109 | Self::UnexpectedEntity(ref name) => format!("Unexpected entity: {name}" ).into(), |
| 110 | Self::UnexpectedName(ref name) => format!("Unexpected name: {name}" ).into(), |
| 111 | Self::UnexpectedNameInsideXml(ref name) => format!("Unexpected name inside XML declaration: {name}" ).into(), |
| 112 | Self::UnexpectedProcessingInstruction(ref buf, token) => format!("Unexpected token inside processing instruction: <? {buf}{token}" ).into(), |
| 113 | Self::UnexpectedQualifiedName(e) => format!("Unexpected token inside qualified name: {e}" ).into(), |
| 114 | Self::UnexpectedToken(token) => format!("Unexpected token: {token}" ).into(), |
| 115 | Self::UnexpectedTokenBefore(before, c) => format!("Unexpected token ' {before}' before ' {c}'" ).into(), |
| 116 | Self::UnexpectedTokenInClosingTag(token) => format!("Unexpected token inside closing tag: {token}" ).into(), |
| 117 | Self::UnexpectedTokenInEntity(token) => format!("Unexpected token inside entity: {token}" ).into(), |
| 118 | Self::UnexpectedTokenInOpeningTag(token) => format!("Unexpected token inside opening tag: {token}" ).into(), |
| 119 | Self::UnexpectedTokenOutsideRoot(token) => format!("Unexpected characters outside the root element: {token}" ).into(), |
| 120 | Self::UnexpectedXmlVersion(ref version) => format!("Invalid XML version: {version}" ).into(), |
| 121 | Self::UnknownMarkupDeclaration(ref v) => format!("Unknown markup declaration: {v}" ).into(), |
| 122 | Self::UnsupportedEncoding(ref v) => format!("Unsupported encoding: {v}" ).into(), |
| 123 | Self::ExceededConfiguredLimit => "This document is larger/more complex than allowed by the parser's configuration" .into(), |
| 124 | } |
| 125 | } |
| 126 | } |
| 127 | |
| 128 | /// An XML parsing error. |
| 129 | /// |
| 130 | /// Consists of a 2D position in a document and a textual message describing the error. |
| 131 | #[derive (Clone, PartialEq, Eq, Debug)] |
| 132 | pub struct Error { |
| 133 | pub(crate) pos: TextPosition, |
| 134 | pub(crate) kind: ErrorKind, |
| 135 | } |
| 136 | |
| 137 | impl fmt::Display for Error { |
| 138 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 139 | use self::ErrorKind::{Io, Syntax, UnexpectedEof, Utf8}; |
| 140 | |
| 141 | write!(f, " {} " , self.pos)?; |
| 142 | match &self.kind { |
| 143 | Io(io_error: &Error) => io_error.fmt(f), |
| 144 | Utf8(reason: &Utf8Error) => reason.fmt(f), |
| 145 | Syntax(msg: &Cow<'static, str>) => f.write_str(data:msg), |
| 146 | UnexpectedEof => f.write_str(data:"Unexpected EOF" ), |
| 147 | } |
| 148 | } |
| 149 | } |
| 150 | |
| 151 | impl Position for Error { |
| 152 | #[inline ] |
| 153 | fn position(&self) -> TextPosition { self.pos } |
| 154 | } |
| 155 | |
| 156 | impl Error { |
| 157 | /// Returns a reference to a message which is contained inside this error. |
| 158 | #[cold ] |
| 159 | #[doc (hidden)] |
| 160 | #[allow (deprecated)] |
| 161 | #[must_use ] |
| 162 | pub fn msg(&self) -> &str { |
| 163 | use self::ErrorKind::{Io, Syntax, UnexpectedEof, Utf8}; |
| 164 | match &self.kind { |
| 165 | Io(io_error: &Error) => io_error.description(), |
| 166 | Utf8(reason: &Utf8Error) => reason.description(), |
| 167 | Syntax(msg: &Cow<'static, str>) => msg.as_ref(), |
| 168 | UnexpectedEof => "Unexpected EOF" , |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | /// Failure reason |
| 173 | #[must_use ] |
| 174 | #[inline ] |
| 175 | pub fn kind(&self) -> &ErrorKind { |
| 176 | &self.kind |
| 177 | } |
| 178 | } |
| 179 | |
| 180 | impl error::Error for Error { |
| 181 | #[allow (deprecated)] |
| 182 | #[cold ] |
| 183 | fn description(&self) -> &str { self.msg() } |
| 184 | } |
| 185 | |
| 186 | impl<'a, P, M> From<(&'a P, M)> for Error where P: Position, M: Into<Cow<'static, str>> { |
| 187 | #[cold ] |
| 188 | fn from(orig: (&'a P, M)) -> Self { |
| 189 | Self { |
| 190 | pos: orig.0.position(), |
| 191 | kind: ErrorKind::Syntax(orig.1.into()), |
| 192 | } |
| 193 | } |
| 194 | } |
| 195 | |
| 196 | impl From<util::CharReadError> for Error { |
| 197 | #[cold ] |
| 198 | fn from(e: util::CharReadError) -> Self { |
| 199 | use crate::util::CharReadError::{Io, UnexpectedEof, Utf8}; |
| 200 | Self { |
| 201 | pos: TextPosition::new(), |
| 202 | kind: match e { |
| 203 | UnexpectedEof => ErrorKind::UnexpectedEof, |
| 204 | Utf8(reason: Utf8Error) => ErrorKind::Utf8(reason), |
| 205 | Io(io_error: Error) => ErrorKind::Io(io_error), |
| 206 | }, |
| 207 | } |
| 208 | } |
| 209 | } |
| 210 | |
| 211 | impl From<io::Error> for Error { |
| 212 | #[cold ] |
| 213 | fn from(e: io::Error) -> Self { |
| 214 | Self { |
| 215 | pos: TextPosition::new(), |
| 216 | kind: ErrorKind::Io(e), |
| 217 | } |
| 218 | } |
| 219 | } |
| 220 | |
| 221 | impl Clone for ErrorKind { |
| 222 | #[cold ] |
| 223 | fn clone(&self) -> Self { |
| 224 | use self::ErrorKind::{Io, Syntax, UnexpectedEof, Utf8}; |
| 225 | match self { |
| 226 | UnexpectedEof => UnexpectedEof, |
| 227 | Utf8(reason: &Utf8Error) => Utf8(*reason), |
| 228 | Io(io_error: &Error) => Io(io::Error::new(io_error.kind(), error:io_error.to_string())), |
| 229 | Syntax(msg: &Cow<'static, str>) => Syntax(msg.clone()), |
| 230 | } |
| 231 | } |
| 232 | } |
| 233 | impl PartialEq for ErrorKind { |
| 234 | #[allow (deprecated)] |
| 235 | fn eq(&self, other: &Self) -> bool { |
| 236 | use self::ErrorKind::{Io, Syntax, UnexpectedEof, Utf8}; |
| 237 | match (self, other) { |
| 238 | (UnexpectedEof, UnexpectedEof) => true, |
| 239 | (Utf8(left: &Utf8Error), Utf8(right: &Utf8Error)) => left == right, |
| 240 | (Io(left: &Error), Io(right: &Error)) => |
| 241 | left.kind() == right.kind() && |
| 242 | left.description() == right.description(), |
| 243 | (Syntax(left: &Cow<'static, str>), Syntax(right: &Cow<'static, str>)) => |
| 244 | left == right, |
| 245 | |
| 246 | (_, _) => false, |
| 247 | } |
| 248 | } |
| 249 | } |
| 250 | impl Eq for ErrorKind {} |
| 251 | |
| 252 | #[test ] |
| 253 | fn err_size() { |
| 254 | assert!(std::mem::size_of::<SyntaxError>() <= 24); |
| 255 | } |
| 256 | |