| 1 | use crate::common::{is_name_char, is_name_start_char, is_whitespace_char}; |
| 2 | use crate::reader::error::SyntaxError; |
| 3 | |
| 4 | use crate::reader::events::XmlEvent; |
| 5 | use crate::reader::lexer::Token; |
| 6 | |
| 7 | use super::{DeclarationSubstate, Encountered, ProcessingInstructionSubstate, PullParser, Result, State}; |
| 8 | |
| 9 | impl PullParser { |
| 10 | pub fn inside_processing_instruction(&mut self, t: Token, s: ProcessingInstructionSubstate) -> Option<Result> { |
| 11 | match s { |
| 12 | ProcessingInstructionSubstate::PIInsideName => match t { |
| 13 | Token::Character(c) if self.buf.is_empty() && is_name_start_char(c) || |
| 14 | self.buf_has_data() && is_name_char(c) => { |
| 15 | if self.buf.len() > self.config.max_name_length { |
| 16 | return Some(self.error(SyntaxError::ExceededConfiguredLimit)); |
| 17 | } |
| 18 | self.buf.push(c); |
| 19 | None |
| 20 | }, |
| 21 | |
| 22 | Token::ProcessingInstructionEnd => { |
| 23 | // self.buf contains PI name |
| 24 | let name = self.take_buf(); |
| 25 | |
| 26 | // Don't need to check for declaration because it has mandatory attributes |
| 27 | // but there is none |
| 28 | match &*name { |
| 29 | // Name is empty, it is an error |
| 30 | "" => Some(self.error(SyntaxError::ProcessingInstructionWithoutName)), |
| 31 | |
| 32 | // Found <?xml-like PI not at the beginning of a document, |
| 33 | // it is an error - see section 2.6 of XML 1.1 spec |
| 34 | n if "xml" .eq_ignore_ascii_case(n) => |
| 35 | Some(self.error(SyntaxError::InvalidXmlProcessingInstruction(name.into()))), |
| 36 | |
| 37 | // All is ok, emitting event |
| 38 | _ => { |
| 39 | debug_assert!(self.next_event.is_none(), " {:?}" , self.next_event); |
| 40 | // can't have a PI before `<?xml` |
| 41 | let event1 = self.set_encountered(Encountered::Declaration); |
| 42 | let event2 = Some(Ok(XmlEvent::ProcessingInstruction { |
| 43 | name, |
| 44 | data: None |
| 45 | })); |
| 46 | // emitting two events at once is cumbersome |
| 47 | let event1 = if event1.is_some() { |
| 48 | self.next_event = event2; |
| 49 | event1 |
| 50 | } else { |
| 51 | event2 |
| 52 | }; |
| 53 | self.into_state(State::OutsideTag, event1) |
| 54 | }, |
| 55 | } |
| 56 | }, |
| 57 | |
| 58 | Token::Character(c) if is_whitespace_char(c) => { |
| 59 | // self.buf contains PI name |
| 60 | let name = self.take_buf(); |
| 61 | |
| 62 | match &*name { |
| 63 | // We have not ever encountered an element and have not parsed XML declaration |
| 64 | "xml" if self.encountered == Encountered::None => |
| 65 | self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeVersion)), |
| 66 | |
| 67 | // Found <?xml-like PI after the beginning of a document, |
| 68 | // it is an error - see section 2.6 of XML 1.1 spec |
| 69 | n if "xml" .eq_ignore_ascii_case(n) => |
| 70 | Some(self.error(SyntaxError::InvalidXmlProcessingInstruction(name.into()))), |
| 71 | |
| 72 | // All is ok, starting parsing PI data |
| 73 | _ => { |
| 74 | self.data.name = name; |
| 75 | // can't have a PI before `<?xml` |
| 76 | let next_event = self.set_encountered(Encountered::Declaration); |
| 77 | self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideData), next_event) |
| 78 | }, |
| 79 | } |
| 80 | }, |
| 81 | |
| 82 | _ => { |
| 83 | let buf = self.take_buf(); |
| 84 | Some(self.error(SyntaxError::UnexpectedProcessingInstruction(buf.into(), t))) |
| 85 | }, |
| 86 | }, |
| 87 | |
| 88 | ProcessingInstructionSubstate::PIInsideData => match t { |
| 89 | Token::ProcessingInstructionEnd => { |
| 90 | let name = self.data.take_name(); |
| 91 | let data = self.take_buf(); |
| 92 | self.into_state_emit( |
| 93 | State::OutsideTag, |
| 94 | Ok(XmlEvent::ProcessingInstruction { name, data: Some(data) }), |
| 95 | ) |
| 96 | }, |
| 97 | |
| 98 | Token::Character(c) if !self.is_valid_xml_char(c) => { |
| 99 | Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32))) |
| 100 | }, |
| 101 | |
| 102 | // Any other token should be treated as plain characters |
| 103 | _ => { |
| 104 | if self.buf.len() > self.config.max_data_length { |
| 105 | return Some(self.error(SyntaxError::ExceededConfiguredLimit)); |
| 106 | } |
| 107 | t.push_to_string(&mut self.buf); |
| 108 | None |
| 109 | }, |
| 110 | }, |
| 111 | } |
| 112 | } |
| 113 | } |
| 114 | |