1 | // Copyright 2014-2017 The html5ever Project Developers. See the |
2 | // COPYRIGHT file at the top-level directory of this distribution. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | // option. This file may not be copied, modified, or distributed |
8 | // except according to those terms. |
9 | |
10 | #![allow (warnings)] |
11 | |
12 | //! The HTML5 tree builder. |
13 | |
14 | pub use crate::interface::{create_element, ElementFlags, NextParserState, Tracer, TreeSink}; |
15 | pub use crate::interface::{AppendNode, AppendText, Attribute, NodeOrText}; |
16 | pub use crate::interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode}; |
17 | |
18 | use self::types::*; |
19 | |
20 | use crate::tendril::StrTendril; |
21 | use crate::{ExpandedName, LocalName, Namespace, QualName}; |
22 | |
23 | use crate::tokenizer; |
24 | use crate::tokenizer::states as tok_state; |
25 | use crate::tokenizer::{Doctype, EndTag, StartTag, Tag, TokenSink, TokenSinkResult}; |
26 | |
27 | use std::borrow::Cow::Borrowed; |
28 | use std::collections::VecDeque; |
29 | use std::default::Default; |
30 | use std::iter::{Enumerate, Rev}; |
31 | use std::mem::replace; |
32 | use std::{fmt, slice}; |
33 | |
34 | use crate::tokenizer::states::{RawData, RawKind}; |
35 | use crate::tree_builder::tag_sets::*; |
36 | use crate::tree_builder::types::*; |
37 | use crate::util::str::to_escaped_string; |
38 | use log::{debug, log_enabled, warn, Level}; |
39 | use mac::{_tt_as_expr_hack, format_if, matches}; |
40 | |
41 | pub use self::PushFlag::*; |
42 | |
43 | #[macro_use ] |
44 | mod tag_sets; |
45 | |
46 | mod data; |
47 | mod types; |
48 | |
49 | include!(concat!(env!("OUT_DIR" ), "/rules.rs" )); |
50 | |
51 | /// Tree builder options, with an impl for Default. |
52 | #[derive (Copy, Clone)] |
53 | pub struct TreeBuilderOpts { |
54 | /// Report all parse errors described in the spec, at some |
55 | /// performance penalty? Default: false |
56 | pub exact_errors: bool, |
57 | |
58 | /// Is scripting enabled? |
59 | pub scripting_enabled: bool, |
60 | |
61 | /// Is this an `iframe srcdoc` document? |
62 | pub iframe_srcdoc: bool, |
63 | |
64 | /// Should we drop the DOCTYPE (if any) from the tree? |
65 | pub drop_doctype: bool, |
66 | |
67 | /// Obsolete, ignored. |
68 | pub ignore_missing_rules: bool, |
69 | |
70 | /// Initial TreeBuilder quirks mode. Default: NoQuirks |
71 | pub quirks_mode: QuirksMode, |
72 | } |
73 | |
74 | impl Default for TreeBuilderOpts { |
75 | fn default() -> TreeBuilderOpts { |
76 | TreeBuilderOpts { |
77 | exact_errors: false, |
78 | scripting_enabled: true, |
79 | iframe_srcdoc: false, |
80 | drop_doctype: false, |
81 | ignore_missing_rules: false, |
82 | quirks_mode: NoQuirks, |
83 | } |
84 | } |
85 | } |
86 | |
87 | /// The HTML tree builder. |
88 | pub struct TreeBuilder<Handle, Sink> { |
89 | /// Options controlling the behavior of the tree builder. |
90 | opts: TreeBuilderOpts, |
91 | |
92 | /// Consumer of tree modifications. |
93 | pub sink: Sink, |
94 | |
95 | /// Insertion mode. |
96 | mode: InsertionMode, |
97 | |
98 | /// Original insertion mode, used by Text and InTableText modes. |
99 | orig_mode: Option<InsertionMode>, |
100 | |
101 | /// Stack of template insertion modes. |
102 | template_modes: Vec<InsertionMode>, |
103 | |
104 | /// Pending table character tokens. |
105 | pending_table_text: Vec<(SplitStatus, StrTendril)>, |
106 | |
107 | /// Quirks mode as set by the parser. |
108 | /// FIXME: can scripts etc. change this? |
109 | quirks_mode: QuirksMode, |
110 | |
111 | /// The document node, which is created by the sink. |
112 | doc_handle: Handle, |
113 | |
114 | /// Stack of open elements, most recently added at end. |
115 | open_elems: Vec<Handle>, |
116 | |
117 | /// List of active formatting elements. |
118 | active_formatting: Vec<FormatEntry<Handle>>, |
119 | |
120 | //§ the-element-pointers |
121 | /// Head element pointer. |
122 | head_elem: Option<Handle>, |
123 | |
124 | /// Form element pointer. |
125 | form_elem: Option<Handle>, |
126 | //§ END |
127 | /// Frameset-ok flag. |
128 | frameset_ok: bool, |
129 | |
130 | /// Ignore a following U+000A LINE FEED? |
131 | ignore_lf: bool, |
132 | |
133 | /// Is foster parenting enabled? |
134 | foster_parenting: bool, |
135 | |
136 | /// The context element for the fragment parsing algorithm. |
137 | context_elem: Option<Handle>, |
138 | |
139 | /// Track current line |
140 | current_line: u64, |
141 | // WARNING: If you add new fields that contain Handles, you |
142 | // must add them to trace_handles() below to preserve memory |
143 | // safety! |
144 | // |
145 | // FIXME: Auto-generate the trace hooks like Servo does. |
146 | } |
147 | |
148 | impl<Handle, Sink> TreeBuilder<Handle, Sink> |
149 | where |
150 | Handle: Clone, |
151 | Sink: TreeSink<Handle = Handle>, |
152 | { |
153 | /// Create a new tree builder which sends tree modifications to a particular `TreeSink`. |
154 | /// |
155 | /// The tree builder is also a `TokenSink`. |
156 | pub fn new(mut sink: Sink, opts: TreeBuilderOpts) -> TreeBuilder<Handle, Sink> { |
157 | let doc_handle = sink.get_document(); |
158 | TreeBuilder { |
159 | opts: opts, |
160 | sink: sink, |
161 | mode: Initial, |
162 | orig_mode: None, |
163 | template_modes: vec![], |
164 | pending_table_text: vec![], |
165 | quirks_mode: opts.quirks_mode, |
166 | doc_handle: doc_handle, |
167 | open_elems: vec![], |
168 | active_formatting: vec![], |
169 | head_elem: None, |
170 | form_elem: None, |
171 | frameset_ok: true, |
172 | ignore_lf: false, |
173 | foster_parenting: false, |
174 | context_elem: None, |
175 | current_line: 1, |
176 | } |
177 | } |
178 | |
179 | /// Create a new tree builder which sends tree modifications to a particular `TreeSink`. |
180 | /// This is for parsing fragments. |
181 | /// |
182 | /// The tree builder is also a `TokenSink`. |
183 | pub fn new_for_fragment( |
184 | mut sink: Sink, |
185 | context_elem: Handle, |
186 | form_elem: Option<Handle>, |
187 | opts: TreeBuilderOpts, |
188 | ) -> TreeBuilder<Handle, Sink> { |
189 | let doc_handle = sink.get_document(); |
190 | let context_is_template = sink.elem_name(&context_elem) == expanded_name!(html "template" ); |
191 | let mut tb = TreeBuilder { |
192 | opts: opts, |
193 | sink: sink, |
194 | mode: Initial, |
195 | orig_mode: None, |
196 | template_modes: if context_is_template { |
197 | vec![InTemplate] |
198 | } else { |
199 | vec![] |
200 | }, |
201 | pending_table_text: vec![], |
202 | quirks_mode: opts.quirks_mode, |
203 | doc_handle: doc_handle, |
204 | open_elems: vec![], |
205 | active_formatting: vec![], |
206 | head_elem: None, |
207 | form_elem: form_elem, |
208 | frameset_ok: true, |
209 | ignore_lf: false, |
210 | foster_parenting: false, |
211 | context_elem: Some(context_elem), |
212 | current_line: 1, |
213 | }; |
214 | |
215 | // https://html.spec.whatwg.org/multipage/#parsing-html-fragments |
216 | // 5. Let root be a new html element with no attributes. |
217 | // 6. Append the element root to the Document node created above. |
218 | // 7. Set up the parser's stack of open elements so that it contains just the single element root. |
219 | tb.create_root(vec![]); |
220 | // 10. Reset the parser's insertion mode appropriately. |
221 | tb.mode = tb.reset_insertion_mode(); |
222 | |
223 | tb |
224 | } |
225 | |
226 | // https://html.spec.whatwg.org/multipage/#concept-frag-parse-context |
227 | // Step 4. Set the state of the HTML parser's tokenization stage as follows: |
228 | pub fn tokenizer_state_for_context_elem(&self) -> tok_state::State { |
229 | let elem = self.context_elem.as_ref().expect("no context element" ); |
230 | let name = match self.sink.elem_name(elem) { |
231 | ExpandedName { |
232 | ns: &ns!(html), |
233 | local, |
234 | } => local, |
235 | _ => return tok_state::Data, |
236 | }; |
237 | match *name { |
238 | local_name!("title" ) | local_name!("textarea" ) => tok_state::RawData(tok_state::Rcdata), |
239 | |
240 | local_name!("style" ) | |
241 | local_name!("xmp" ) | |
242 | local_name!("iframe" ) | |
243 | local_name!("noembed" ) | |
244 | local_name!("noframes" ) => tok_state::RawData(tok_state::Rawtext), |
245 | |
246 | local_name!("script" ) => tok_state::RawData(tok_state::ScriptData), |
247 | |
248 | local_name!("noscript" ) => { |
249 | if self.opts.scripting_enabled { |
250 | tok_state::RawData(tok_state::Rawtext) |
251 | } else { |
252 | tok_state::Data |
253 | } |
254 | }, |
255 | |
256 | local_name!("plaintext" ) => tok_state::Plaintext, |
257 | |
258 | _ => tok_state::Data, |
259 | } |
260 | } |
261 | |
262 | /// Call the `Tracer`'s `trace_handle` method on every `Handle` in the tree builder's |
263 | /// internal state. This is intended to support garbage-collected DOMs. |
264 | pub fn trace_handles(&self, tracer: &Tracer<Handle = Handle>) { |
265 | tracer.trace_handle(&self.doc_handle); |
266 | for e in &self.open_elems { |
267 | tracer.trace_handle(e); |
268 | } |
269 | for e in &self.active_formatting { |
270 | match e { |
271 | &Element(ref h, _) => tracer.trace_handle(h), |
272 | _ => (), |
273 | } |
274 | } |
275 | self.head_elem.as_ref().map(|h| tracer.trace_handle(h)); |
276 | self.form_elem.as_ref().map(|h| tracer.trace_handle(h)); |
277 | self.context_elem.as_ref().map(|h| tracer.trace_handle(h)); |
278 | } |
279 | |
280 | #[allow (dead_code)] |
281 | fn dump_state(&self, label: String) { |
282 | println!("dump_state on {}" , label); |
283 | print!(" open_elems:" ); |
284 | for node in self.open_elems.iter() { |
285 | let name = self.sink.elem_name(node); |
286 | match *name.ns { |
287 | ns!(html) => print!(" {}" , name.local), |
288 | _ => panic!(), |
289 | } |
290 | } |
291 | println!("" ); |
292 | print!(" active_formatting:" ); |
293 | for entry in self.active_formatting.iter() { |
294 | match entry { |
295 | &Marker => print!(" Marker" ), |
296 | &Element(ref h, _) => { |
297 | let name = self.sink.elem_name(h); |
298 | match *name.ns { |
299 | ns!(html) => print!(" {}" , name.local), |
300 | _ => panic!(), |
301 | } |
302 | }, |
303 | } |
304 | } |
305 | println!("" ); |
306 | } |
307 | |
308 | fn debug_step(&self, mode: InsertionMode, token: &Token) { |
309 | if log_enabled!(Level::Debug) { |
310 | debug!( |
311 | "processing {} in insertion mode {:?}" , |
312 | to_escaped_string(token), |
313 | mode |
314 | ); |
315 | } |
316 | } |
317 | |
318 | fn process_to_completion(&mut self, mut token: Token) -> TokenSinkResult<Handle> { |
319 | // Queue of additional tokens yet to be processed. |
320 | // This stays empty in the common case where we don't split whitespace. |
321 | let mut more_tokens = VecDeque::new(); |
322 | |
323 | loop { |
324 | let should_have_acknowledged_self_closing_flag = matches!( |
325 | token, |
326 | TagToken(Tag { |
327 | self_closing: true, |
328 | kind: StartTag, |
329 | .. |
330 | }) |
331 | ); |
332 | let result = if self.is_foreign(&token) { |
333 | self.step_foreign(token) |
334 | } else { |
335 | let mode = self.mode; |
336 | self.step(mode, token) |
337 | }; |
338 | match result { |
339 | Done => { |
340 | if should_have_acknowledged_self_closing_flag { |
341 | self.sink |
342 | .parse_error(Borrowed("Unacknowledged self-closing tag" )); |
343 | } |
344 | token = unwrap_or_return!( |
345 | more_tokens.pop_front(), |
346 | tokenizer::TokenSinkResult::Continue |
347 | ); |
348 | }, |
349 | DoneAckSelfClosing => { |
350 | token = unwrap_or_return!( |
351 | more_tokens.pop_front(), |
352 | tokenizer::TokenSinkResult::Continue |
353 | ); |
354 | }, |
355 | Reprocess(m, t) => { |
356 | self.mode = m; |
357 | token = t; |
358 | }, |
359 | ReprocessForeign(t) => { |
360 | token = t; |
361 | }, |
362 | SplitWhitespace(mut buf) => { |
363 | let p = buf.pop_front_char_run(|c| c.is_ascii_whitespace()); |
364 | let (first, is_ws) = unwrap_or_return!(p, tokenizer::TokenSinkResult::Continue); |
365 | let status = if is_ws { Whitespace } else { NotWhitespace }; |
366 | token = CharacterTokens(status, first); |
367 | |
368 | if buf.len32() > 0 { |
369 | more_tokens.push_back(CharacterTokens(NotSplit, buf)); |
370 | } |
371 | }, |
372 | Script(node) => { |
373 | assert!(more_tokens.is_empty()); |
374 | return tokenizer::TokenSinkResult::Script(node); |
375 | }, |
376 | ToPlaintext => { |
377 | assert!(more_tokens.is_empty()); |
378 | return tokenizer::TokenSinkResult::Plaintext; |
379 | }, |
380 | ToRawData(k) => { |
381 | assert!(more_tokens.is_empty()); |
382 | return tokenizer::TokenSinkResult::RawData(k); |
383 | }, |
384 | } |
385 | } |
386 | } |
387 | |
388 | /// Are we parsing a HTML fragment? |
389 | pub fn is_fragment(&self) -> bool { |
390 | self.context_elem.is_some() |
391 | } |
392 | |
393 | /// https://html.spec.whatwg.org/multipage/#appropriate-place-for-inserting-a-node |
394 | fn appropriate_place_for_insertion( |
395 | &mut self, |
396 | override_target: Option<Handle>, |
397 | ) -> InsertionPoint<Handle> { |
398 | use self::tag_sets::*; |
399 | |
400 | declare_tag_set!(foster_target = "table" "tbody" "tfoot" "thead" "tr" ); |
401 | let target = override_target.unwrap_or_else(|| self.current_node().clone()); |
402 | if !(self.foster_parenting && self.elem_in(&target, foster_target)) { |
403 | if self.html_elem_named(&target, local_name!("template" )) { |
404 | // No foster parenting (inside template). |
405 | let contents = self.sink.get_template_contents(&target); |
406 | return LastChild(contents); |
407 | } else { |
408 | // No foster parenting (the common case). |
409 | return LastChild(target); |
410 | } |
411 | } |
412 | |
413 | // Foster parenting |
414 | let mut iter = self.open_elems.iter().rev().peekable(); |
415 | while let Some(elem) = iter.next() { |
416 | if self.html_elem_named(&elem, local_name!("template" )) { |
417 | let contents = self.sink.get_template_contents(&elem); |
418 | return LastChild(contents); |
419 | } else if self.html_elem_named(&elem, local_name!("table" )) { |
420 | return TableFosterParenting { |
421 | element: elem.clone(), |
422 | prev_element: (*iter.peek().unwrap()).clone(), |
423 | }; |
424 | } |
425 | } |
426 | let html_elem = self.html_elem(); |
427 | LastChild(html_elem.clone()) |
428 | } |
429 | |
430 | fn insert_at(&mut self, insertion_point: InsertionPoint<Handle>, child: NodeOrText<Handle>) { |
431 | match insertion_point { |
432 | LastChild(parent) => self.sink.append(&parent, child), |
433 | BeforeSibling(sibling) => self.sink.append_before_sibling(&sibling, child), |
434 | TableFosterParenting { |
435 | element, |
436 | prev_element, |
437 | } => self |
438 | .sink |
439 | .append_based_on_parent_node(&element, &prev_element, child), |
440 | } |
441 | } |
442 | } |
443 | |
444 | impl<Handle, Sink> TokenSink for TreeBuilder<Handle, Sink> |
445 | where |
446 | Handle: Clone, |
447 | Sink: TreeSink<Handle = Handle>, |
448 | { |
449 | type Handle = Handle; |
450 | |
451 | fn process_token( |
452 | &mut self, |
453 | token: tokenizer::Token, |
454 | line_number: u64, |
455 | ) -> TokenSinkResult<Handle> { |
456 | if line_number != self.current_line { |
457 | self.sink.set_current_line(line_number); |
458 | } |
459 | let ignore_lf = replace(&mut self.ignore_lf, false); |
460 | |
461 | // Handle `ParseError` and `DoctypeToken`; convert everything else to the local `Token` type. |
462 | let token = match token { |
463 | tokenizer::ParseError(e) => { |
464 | self.sink.parse_error(e); |
465 | return tokenizer::TokenSinkResult::Continue; |
466 | }, |
467 | |
468 | tokenizer::DoctypeToken(dt) => { |
469 | if self.mode == Initial { |
470 | let (err, quirk) = data::doctype_error_and_quirks(&dt, self.opts.iframe_srcdoc); |
471 | if err { |
472 | self.sink.parse_error(format_if!( |
473 | self.opts.exact_errors, |
474 | "Bad DOCTYPE" , |
475 | "Bad DOCTYPE: {:?}" , |
476 | dt |
477 | )); |
478 | } |
479 | let Doctype { |
480 | name, |
481 | public_id, |
482 | system_id, |
483 | force_quirks: _, |
484 | } = dt; |
485 | if !self.opts.drop_doctype { |
486 | self.sink.append_doctype_to_document( |
487 | name.unwrap_or(StrTendril::new()), |
488 | public_id.unwrap_or(StrTendril::new()), |
489 | system_id.unwrap_or(StrTendril::new()), |
490 | ); |
491 | } |
492 | self.set_quirks_mode(quirk); |
493 | |
494 | self.mode = BeforeHtml; |
495 | return tokenizer::TokenSinkResult::Continue; |
496 | } else { |
497 | self.sink.parse_error(format_if!( |
498 | self.opts.exact_errors, |
499 | "DOCTYPE in body" , |
500 | "DOCTYPE in insertion mode {:?}" , |
501 | self.mode |
502 | )); |
503 | return tokenizer::TokenSinkResult::Continue; |
504 | } |
505 | }, |
506 | |
507 | tokenizer::TagToken(x) => TagToken(x), |
508 | tokenizer::CommentToken(x) => CommentToken(x), |
509 | tokenizer::NullCharacterToken => NullCharacterToken, |
510 | tokenizer::EOFToken => EOFToken, |
511 | |
512 | tokenizer::CharacterTokens(mut x) => { |
513 | if ignore_lf && x.starts_with(" \n" ) { |
514 | x.pop_front(1); |
515 | } |
516 | if x.is_empty() { |
517 | return tokenizer::TokenSinkResult::Continue; |
518 | } |
519 | CharacterTokens(NotSplit, x) |
520 | }, |
521 | }; |
522 | |
523 | self.process_to_completion(token) |
524 | } |
525 | |
526 | fn end(&mut self) { |
527 | for elem in self.open_elems.drain(..).rev() { |
528 | self.sink.pop(&elem); |
529 | } |
530 | } |
531 | |
532 | fn adjusted_current_node_present_but_not_in_html_namespace(&self) -> bool { |
533 | !self.open_elems.is_empty() && |
534 | self.sink.elem_name(self.adjusted_current_node()).ns != &ns!(html) |
535 | } |
536 | } |
537 | |
538 | pub fn html_elem<Handle>(open_elems: &[Handle]) -> &Handle { |
539 | &open_elems[0] |
540 | } |
541 | |
542 | pub struct ActiveFormattingIter<'a, Handle: 'a> { |
543 | iter: Rev<Enumerate<slice::Iter<'a, FormatEntry<Handle>>>>, |
544 | } |
545 | |
546 | impl<'a, Handle> Iterator for ActiveFormattingIter<'a, Handle> { |
547 | type Item = (usize, &'a Handle, &'a Tag); |
548 | fn next(&mut self) -> Option<(usize, &'a Handle, &'a Tag)> { |
549 | match self.iter.next() { |
550 | None | Some((_, &Marker)) => None, |
551 | Some((i: usize, &Element(ref h: &Handle, ref t: &Tag))) => Some((i, h, t)), |
552 | } |
553 | } |
554 | } |
555 | |
556 | pub enum PushFlag { |
557 | Push, |
558 | NoPush, |
559 | } |
560 | |
561 | enum Bookmark<Handle> { |
562 | Replace(Handle), |
563 | InsertAfter(Handle), |
564 | } |
565 | |
566 | macro_rules! qualname { |
567 | ("" , $local:tt) => { |
568 | QualName { |
569 | prefix: None, |
570 | ns: ns!(), |
571 | local: local_name!($local), |
572 | } |
573 | }; |
574 | ($prefix: tt $ns:tt $local:tt) => { |
575 | QualName { |
576 | prefix: Some(namespace_prefix!($prefix)), |
577 | ns: ns!($ns), |
578 | local: local_name!($local), |
579 | } |
580 | }; |
581 | } |
582 | |
583 | #[doc (hidden)] |
584 | impl<Handle, Sink> TreeBuilder<Handle, Sink> |
585 | where |
586 | Handle: Clone, |
587 | Sink: TreeSink<Handle = Handle>, |
588 | { |
589 | fn unexpected<T: fmt::Debug>(&mut self, _thing: &T) -> ProcessResult<Handle> { |
590 | self.sink.parse_error(format_if!( |
591 | self.opts.exact_errors, |
592 | "Unexpected token" , |
593 | "Unexpected token {} in insertion mode {:?}" , |
594 | to_escaped_string(_thing), |
595 | self.mode |
596 | )); |
597 | Done |
598 | } |
599 | |
600 | fn assert_named(&mut self, node: &Handle, name: LocalName) { |
601 | assert!(self.html_elem_named(&node, name)); |
602 | } |
603 | |
604 | /// Iterate over the active formatting elements (with index in the list) from the end |
605 | /// to the last marker, or the beginning if there are no markers. |
606 | fn active_formatting_end_to_marker<'a>(&'a self) -> ActiveFormattingIter<'a, Handle> { |
607 | ActiveFormattingIter { |
608 | iter: self.active_formatting.iter().enumerate().rev(), |
609 | } |
610 | } |
611 | |
612 | fn position_in_active_formatting(&self, element: &Handle) -> Option<usize> { |
613 | self.active_formatting.iter().position(|n| match n { |
614 | &Marker => false, |
615 | &Element(ref handle, _) => self.sink.same_node(handle, element), |
616 | }) |
617 | } |
618 | |
619 | fn set_quirks_mode(&mut self, mode: QuirksMode) { |
620 | self.quirks_mode = mode; |
621 | self.sink.set_quirks_mode(mode); |
622 | } |
623 | |
624 | fn stop_parsing(&mut self) -> ProcessResult<Handle> { |
625 | Done |
626 | } |
627 | |
628 | //§ parsing-elements-that-contain-only-text |
629 | // Switch to `Text` insertion mode, save the old mode, and |
630 | // switch the tokenizer to a raw-data state. |
631 | // The latter only takes effect after the current / next |
632 | // `process_token` of a start tag returns! |
633 | fn to_raw_text_mode(&mut self, k: RawKind) -> ProcessResult<Handle> { |
634 | self.orig_mode = Some(self.mode); |
635 | self.mode = Text; |
636 | ToRawData(k) |
637 | } |
638 | |
639 | // The generic raw text / RCDATA parsing algorithm. |
640 | fn parse_raw_data(&mut self, tag: Tag, k: RawKind) -> ProcessResult<Handle> { |
641 | self.insert_element_for(tag); |
642 | self.to_raw_text_mode(k) |
643 | } |
644 | //§ END |
645 | |
646 | fn current_node(&self) -> &Handle { |
647 | self.open_elems.last().expect("no current element" ) |
648 | } |
649 | |
650 | fn adjusted_current_node(&self) -> &Handle { |
651 | if self.open_elems.len() == 1 { |
652 | if let Some(ctx) = self.context_elem.as_ref() { |
653 | return ctx; |
654 | } |
655 | } |
656 | self.current_node() |
657 | } |
658 | |
659 | fn current_node_in<TagSet>(&self, set: TagSet) -> bool |
660 | where |
661 | TagSet: Fn(ExpandedName) -> bool, |
662 | { |
663 | set(self.sink.elem_name(self.current_node())) |
664 | } |
665 | |
666 | // Insert at the "appropriate place for inserting a node". |
667 | fn insert_appropriately(&mut self, child: NodeOrText<Handle>, override_target: Option<Handle>) { |
668 | let insertion_point = self.appropriate_place_for_insertion(override_target); |
669 | self.insert_at(insertion_point, child); |
670 | } |
671 | |
672 | fn adoption_agency(&mut self, subject: LocalName) { |
673 | // 1. |
674 | if self.current_node_named(subject.clone()) { |
675 | if self |
676 | .position_in_active_formatting(self.current_node()) |
677 | .is_none() |
678 | { |
679 | self.pop(); |
680 | return; |
681 | } |
682 | } |
683 | |
684 | // 2. 3. 4. |
685 | for _ in 0..8 { |
686 | // 5. |
687 | let (fmt_elem_index, fmt_elem, fmt_elem_tag) = unwrap_or_return!( |
688 | // We clone the Handle and Tag so they don't cause an immutable borrow of self. |
689 | self.active_formatting_end_to_marker() |
690 | .filter(|&(_, _, tag)| tag.name == subject) |
691 | .next() |
692 | .map(|(i, h, t)| (i, h.clone(), t.clone())), |
693 | { |
694 | self.process_end_tag_in_body(Tag { |
695 | kind: EndTag, |
696 | name: subject, |
697 | self_closing: false, |
698 | attrs: vec![], |
699 | }); |
700 | } |
701 | ); |
702 | |
703 | let fmt_elem_stack_index = unwrap_or_return!( |
704 | self.open_elems |
705 | .iter() |
706 | .rposition(|n| self.sink.same_node(n, &fmt_elem)), |
707 | { |
708 | self.sink |
709 | .parse_error(Borrowed("Formatting element not open" )); |
710 | self.active_formatting.remove(fmt_elem_index); |
711 | } |
712 | ); |
713 | |
714 | // 7. |
715 | if !self.in_scope(default_scope, |n| self.sink.same_node(&n, &fmt_elem)) { |
716 | self.sink |
717 | .parse_error(Borrowed("Formatting element not in scope" )); |
718 | return; |
719 | } |
720 | |
721 | // 8. |
722 | if !self.sink.same_node(self.current_node(), &fmt_elem) { |
723 | self.sink |
724 | .parse_error(Borrowed("Formatting element not current node" )); |
725 | } |
726 | |
727 | // 9. |
728 | let (furthest_block_index, furthest_block) = unwrap_or_return!( |
729 | self.open_elems |
730 | .iter() |
731 | .enumerate() |
732 | .skip(fmt_elem_stack_index) |
733 | .filter(|&(_, open_element)| self.elem_in(open_element, special_tag)) |
734 | .next() |
735 | .map(|(i, h)| (i, h.clone())), |
736 | // 10. |
737 | { |
738 | self.open_elems.truncate(fmt_elem_stack_index); |
739 | self.active_formatting.remove(fmt_elem_index); |
740 | } |
741 | ); |
742 | |
743 | // 11. |
744 | let common_ancestor = self.open_elems[fmt_elem_stack_index - 1].clone(); |
745 | |
746 | // 12. |
747 | let mut bookmark = Bookmark::Replace(fmt_elem.clone()); |
748 | |
749 | // 13. |
750 | let mut node; |
751 | let mut node_index = furthest_block_index; |
752 | let mut last_node = furthest_block.clone(); |
753 | |
754 | // 13.1. |
755 | let mut inner_counter = 0; |
756 | loop { |
757 | // 13.2. |
758 | inner_counter += 1; |
759 | |
760 | // 13.3. |
761 | node_index -= 1; |
762 | node = self.open_elems[node_index].clone(); |
763 | |
764 | // 13.4. |
765 | if self.sink.same_node(&node, &fmt_elem) { |
766 | break; |
767 | } |
768 | |
769 | // 13.5. |
770 | if inner_counter > 3 { |
771 | self.position_in_active_formatting(&node) |
772 | .map(|position| self.active_formatting.remove(position)); |
773 | self.open_elems.remove(node_index); |
774 | continue; |
775 | } |
776 | |
777 | let node_formatting_index = unwrap_or_else!( |
778 | self.position_in_active_formatting(&node), |
779 | // 13.6. |
780 | { |
781 | self.open_elems.remove(node_index); |
782 | continue; |
783 | } |
784 | ); |
785 | |
786 | // 13.7. |
787 | let tag = match self.active_formatting[node_formatting_index] { |
788 | Element(ref h, ref t) => { |
789 | assert!(self.sink.same_node(h, &node)); |
790 | t.clone() |
791 | }, |
792 | Marker => panic!("Found marker during adoption agency" ), |
793 | }; |
794 | // FIXME: Is there a way to avoid cloning the attributes twice here (once on their |
795 | // own, once as part of t.clone() above)? |
796 | let new_element = create_element( |
797 | &mut self.sink, |
798 | QualName::new(None, ns!(html), tag.name.clone()), |
799 | tag.attrs.clone(), |
800 | ); |
801 | self.open_elems[node_index] = new_element.clone(); |
802 | self.active_formatting[node_formatting_index] = Element(new_element.clone(), tag); |
803 | node = new_element; |
804 | |
805 | // 13.8. |
806 | if self.sink.same_node(&last_node, &furthest_block) { |
807 | bookmark = Bookmark::InsertAfter(node.clone()); |
808 | } |
809 | |
810 | // 13.9. |
811 | self.sink.remove_from_parent(&last_node); |
812 | self.sink.append(&node, AppendNode(last_node.clone())); |
813 | |
814 | // 13.10. |
815 | last_node = node.clone(); |
816 | |
817 | // 13.11. |
818 | } |
819 | |
820 | // 14. |
821 | self.sink.remove_from_parent(&last_node); |
822 | self.insert_appropriately(AppendNode(last_node.clone()), Some(common_ancestor)); |
823 | |
824 | // 15. |
825 | // FIXME: Is there a way to avoid cloning the attributes twice here (once on their own, |
826 | // once as part of t.clone() above)? |
827 | let new_element = create_element( |
828 | &mut self.sink, |
829 | QualName::new(None, ns!(html), fmt_elem_tag.name.clone()), |
830 | fmt_elem_tag.attrs.clone(), |
831 | ); |
832 | let new_entry = Element(new_element.clone(), fmt_elem_tag); |
833 | |
834 | // 16. |
835 | self.sink.reparent_children(&furthest_block, &new_element); |
836 | |
837 | // 17. |
838 | self.sink |
839 | .append(&furthest_block, AppendNode(new_element.clone())); |
840 | |
841 | // 18. |
842 | // FIXME: We could probably get rid of the position_in_active_formatting() calls here |
843 | // if we had a more clever Bookmark representation. |
844 | match bookmark { |
845 | Bookmark::Replace(to_replace) => { |
846 | let index = self |
847 | .position_in_active_formatting(&to_replace) |
848 | .expect("bookmark not found in active formatting elements" ); |
849 | self.active_formatting[index] = new_entry; |
850 | }, |
851 | Bookmark::InsertAfter(previous) => { |
852 | let index = self |
853 | .position_in_active_formatting(&previous) |
854 | .expect("bookmark not found in active formatting elements" ) + |
855 | 1; |
856 | self.active_formatting.insert(index, new_entry); |
857 | let old_index = self |
858 | .position_in_active_formatting(&fmt_elem) |
859 | .expect("formatting element not found in active formatting elements" ); |
860 | self.active_formatting.remove(old_index); |
861 | }, |
862 | } |
863 | |
864 | // 19. |
865 | self.remove_from_stack(&fmt_elem); |
866 | let new_furthest_block_index = self |
867 | .open_elems |
868 | .iter() |
869 | .position(|n| self.sink.same_node(n, &furthest_block)) |
870 | .expect("furthest block missing from open element stack" ); |
871 | self.open_elems |
872 | .insert(new_furthest_block_index + 1, new_element); |
873 | |
874 | // 20. |
875 | } |
876 | } |
877 | |
878 | fn push(&mut self, elem: &Handle) { |
879 | self.open_elems.push(elem.clone()); |
880 | } |
881 | |
882 | fn pop(&mut self) -> Handle { |
883 | let elem = self.open_elems.pop().expect("no current element" ); |
884 | self.sink.pop(&elem); |
885 | elem |
886 | } |
887 | |
888 | fn remove_from_stack(&mut self, elem: &Handle) { |
889 | let sink = &mut self.sink; |
890 | let position = self |
891 | .open_elems |
892 | .iter() |
893 | .rposition(|x| sink.same_node(elem, &x)); |
894 | if let Some(position) = position { |
895 | self.open_elems.remove(position); |
896 | sink.pop(elem); |
897 | } |
898 | } |
899 | |
900 | fn is_marker_or_open(&self, entry: &FormatEntry<Handle>) -> bool { |
901 | match *entry { |
902 | Marker => true, |
903 | Element(ref node, _) => self |
904 | .open_elems |
905 | .iter() |
906 | .rev() |
907 | .any(|n| self.sink.same_node(&n, &node)), |
908 | } |
909 | } |
910 | |
911 | /// Reconstruct the active formatting elements. |
912 | fn reconstruct_formatting(&mut self) { |
913 | { |
914 | let last = unwrap_or_return!(self.active_formatting.last(), ()); |
915 | if self.is_marker_or_open(last) { |
916 | return; |
917 | } |
918 | } |
919 | |
920 | let mut entry_index = self.active_formatting.len() - 1; |
921 | loop { |
922 | if entry_index == 0 { |
923 | break; |
924 | } |
925 | entry_index -= 1; |
926 | if self.is_marker_or_open(&self.active_formatting[entry_index]) { |
927 | entry_index += 1; |
928 | break; |
929 | } |
930 | } |
931 | |
932 | loop { |
933 | let tag = match self.active_formatting[entry_index] { |
934 | Element(_, ref t) => t.clone(), |
935 | Marker => panic!("Found marker during formatting element reconstruction" ), |
936 | }; |
937 | |
938 | // FIXME: Is there a way to avoid cloning the attributes twice here (once on their own, |
939 | // once as part of t.clone() above)? |
940 | let new_element = |
941 | self.insert_element(Push, ns!(html), tag.name.clone(), tag.attrs.clone()); |
942 | self.active_formatting[entry_index] = Element(new_element, tag); |
943 | if entry_index == self.active_formatting.len() - 1 { |
944 | break; |
945 | } |
946 | entry_index += 1; |
947 | } |
948 | } |
949 | |
950 | /// Get the first element on the stack, which will be the <html> element. |
951 | fn html_elem(&self) -> &Handle { |
952 | &self.open_elems[0] |
953 | } |
954 | |
955 | /// Get the second element on the stack, if it's a HTML body element. |
956 | fn body_elem(&self) -> Option<&Handle> { |
957 | if self.open_elems.len() <= 1 { |
958 | return None; |
959 | } |
960 | |
961 | let node = &self.open_elems[1]; |
962 | if self.html_elem_named(node, local_name!("body" )) { |
963 | Some(node) |
964 | } else { |
965 | None |
966 | } |
967 | } |
968 | |
969 | /// Signal an error depending on the state of the stack of open elements at |
970 | /// the end of the body. |
971 | fn check_body_end(&mut self) { |
972 | declare_tag_set!(body_end_ok = |
973 | "dd" "dt" "li" "optgroup" "option" "p" "rp" "rt" "tbody" "td" "tfoot" "th" |
974 | "thead" "tr" "body" "html" ); |
975 | |
976 | for elem in self.open_elems.iter() { |
977 | let error; |
978 | { |
979 | let name = self.sink.elem_name(elem); |
980 | if body_end_ok(name) { |
981 | continue; |
982 | } |
983 | error = format_if!( |
984 | self.opts.exact_errors, |
985 | "Unexpected open tag at end of body" , |
986 | "Unexpected open tag {:?} at end of body" , |
987 | name |
988 | ); |
989 | } |
990 | self.sink.parse_error(error); |
991 | // FIXME: Do we keep checking after finding one bad tag? |
992 | // The spec suggests not. |
993 | return; |
994 | } |
995 | } |
996 | |
997 | fn in_scope<TagSet, Pred>(&self, scope: TagSet, pred: Pred) -> bool |
998 | where |
999 | TagSet: Fn(ExpandedName) -> bool, |
1000 | Pred: Fn(Handle) -> bool, |
1001 | { |
1002 | for node in self.open_elems.iter().rev() { |
1003 | if pred(node.clone()) { |
1004 | return true; |
1005 | } |
1006 | if scope(self.sink.elem_name(node)) { |
1007 | return false; |
1008 | } |
1009 | } |
1010 | |
1011 | // supposed to be impossible, because <html> is always in scope |
1012 | |
1013 | false |
1014 | } |
1015 | |
1016 | fn elem_in<TagSet>(&self, elem: &Handle, set: TagSet) -> bool |
1017 | where |
1018 | TagSet: Fn(ExpandedName) -> bool, |
1019 | { |
1020 | set(self.sink.elem_name(elem)) |
1021 | } |
1022 | |
1023 | fn html_elem_named(&self, elem: &Handle, name: LocalName) -> bool { |
1024 | let expanded = self.sink.elem_name(elem); |
1025 | *expanded.ns == ns!(html) && *expanded.local == name |
1026 | } |
1027 | |
1028 | fn in_html_elem_named(&self, name: LocalName) -> bool { |
1029 | self.open_elems |
1030 | .iter() |
1031 | .any(|elem| self.html_elem_named(elem, name.clone())) |
1032 | } |
1033 | |
1034 | fn current_node_named(&self, name: LocalName) -> bool { |
1035 | self.html_elem_named(self.current_node(), name) |
1036 | } |
1037 | |
1038 | fn in_scope_named<TagSet>(&self, scope: TagSet, name: LocalName) -> bool |
1039 | where |
1040 | TagSet: Fn(ExpandedName) -> bool, |
1041 | { |
1042 | self.in_scope(scope, |elem| self.html_elem_named(&elem, name.clone())) |
1043 | } |
1044 | |
1045 | //§ closing-elements-that-have-implied-end-tags |
1046 | fn generate_implied_end<TagSet>(&mut self, set: TagSet) |
1047 | where |
1048 | TagSet: Fn(ExpandedName) -> bool, |
1049 | { |
1050 | loop { |
1051 | { |
1052 | let elem = unwrap_or_return!(self.open_elems.last(), ()); |
1053 | let nsname = self.sink.elem_name(elem); |
1054 | if !set(nsname) { |
1055 | return; |
1056 | } |
1057 | } |
1058 | self.pop(); |
1059 | } |
1060 | } |
1061 | |
1062 | fn generate_implied_end_except(&mut self, except: LocalName) { |
1063 | self.generate_implied_end(|p| { |
1064 | if *p.ns == ns!(html) && *p.local == except { |
1065 | false |
1066 | } else { |
1067 | cursory_implied_end(p) |
1068 | } |
1069 | }); |
1070 | } |
1071 | //§ END |
1072 | |
1073 | // Pop elements until the current element is in the set. |
1074 | fn pop_until_current<TagSet>(&mut self, pred: TagSet) |
1075 | where |
1076 | TagSet: Fn(ExpandedName) -> bool, |
1077 | { |
1078 | loop { |
1079 | if self.current_node_in(|x| pred(x)) { |
1080 | break; |
1081 | } |
1082 | self.open_elems.pop(); |
1083 | } |
1084 | } |
1085 | |
1086 | // Pop elements until an element from the set has been popped. Returns the |
1087 | // number of elements popped. |
1088 | fn pop_until<P>(&mut self, pred: P) -> usize |
1089 | where |
1090 | P: Fn(ExpandedName) -> bool, |
1091 | { |
1092 | let mut n = 0; |
1093 | loop { |
1094 | n += 1; |
1095 | match self.open_elems.pop() { |
1096 | None => break, |
1097 | Some(elem) => { |
1098 | if pred(self.sink.elem_name(&elem)) { |
1099 | break; |
1100 | } |
1101 | }, |
1102 | } |
1103 | } |
1104 | n |
1105 | } |
1106 | |
1107 | fn pop_until_named(&mut self, name: LocalName) -> usize { |
1108 | self.pop_until(|p| *p.ns == ns!(html) && *p.local == name) |
1109 | } |
1110 | |
1111 | // Pop elements until one with the specified name has been popped. |
1112 | // Signal an error if it was not the first one. |
1113 | fn expect_to_close(&mut self, name: LocalName) { |
1114 | if self.pop_until_named(name.clone()) != 1 { |
1115 | self.sink.parse_error(format_if!( |
1116 | self.opts.exact_errors, |
1117 | "Unexpected open element" , |
1118 | "Unexpected open element while closing {:?}" , |
1119 | name |
1120 | )); |
1121 | } |
1122 | } |
1123 | |
1124 | fn close_p_element(&mut self) { |
1125 | declare_tag_set!(implied = [cursory_implied_end] - "p" ); |
1126 | self.generate_implied_end(implied); |
1127 | self.expect_to_close(local_name!("p" )); |
1128 | } |
1129 | |
1130 | fn close_p_element_in_button_scope(&mut self) { |
1131 | if self.in_scope_named(button_scope, local_name!("p" )) { |
1132 | self.close_p_element(); |
1133 | } |
1134 | } |
1135 | |
1136 | // Check <input> tags for type=hidden |
1137 | fn is_type_hidden(&self, tag: &Tag) -> bool { |
1138 | match tag |
1139 | .attrs |
1140 | .iter() |
1141 | .find(|&at| at.name.expanded() == expanded_name!("" , "type" )) |
1142 | { |
1143 | None => false, |
1144 | Some(at) => (&*at.value).eq_ignore_ascii_case("hidden" ), |
1145 | } |
1146 | } |
1147 | |
1148 | fn foster_parent_in_body(&mut self, token: Token) -> ProcessResult<Handle> { |
1149 | warn!("foster parenting not implemented" ); |
1150 | self.foster_parenting = true; |
1151 | let res = self.step(InBody, token); |
1152 | // FIXME: what if res is Reprocess? |
1153 | self.foster_parenting = false; |
1154 | res |
1155 | } |
1156 | |
1157 | fn process_chars_in_table(&mut self, token: Token) -> ProcessResult<Handle> { |
1158 | declare_tag_set!(table_outer = "table" "tbody" "tfoot" "thead" "tr" ); |
1159 | if self.current_node_in(table_outer) { |
1160 | assert!(self.pending_table_text.is_empty()); |
1161 | self.orig_mode = Some(self.mode); |
1162 | Reprocess(InTableText, token) |
1163 | } else { |
1164 | self.sink.parse_error(format_if!( |
1165 | self.opts.exact_errors, |
1166 | "Unexpected characters in table" , |
1167 | "Unexpected characters {} in table" , |
1168 | to_escaped_string(&token) |
1169 | )); |
1170 | self.foster_parent_in_body(token) |
1171 | } |
1172 | } |
1173 | |
1174 | // https://html.spec.whatwg.org/multipage/#reset-the-insertion-mode-appropriately |
1175 | fn reset_insertion_mode(&mut self) -> InsertionMode { |
1176 | for (i, mut node) in self.open_elems.iter().enumerate().rev() { |
1177 | let last = i == 0usize; |
1178 | if let (true, Some(ctx)) = (last, self.context_elem.as_ref()) { |
1179 | node = ctx; |
1180 | } |
1181 | let name = match self.sink.elem_name(node) { |
1182 | ExpandedName { |
1183 | ns: &ns!(html), |
1184 | local, |
1185 | } => local, |
1186 | _ => continue, |
1187 | }; |
1188 | match *name { |
1189 | local_name!("select" ) => { |
1190 | for ancestor in self.open_elems[0..i].iter().rev() { |
1191 | if self.html_elem_named(ancestor, local_name!("template" )) { |
1192 | return InSelect; |
1193 | } else if self.html_elem_named(ancestor, local_name!("table" )) { |
1194 | return InSelectInTable; |
1195 | } |
1196 | } |
1197 | return InSelect; |
1198 | }, |
1199 | local_name!("td" ) | local_name!("th" ) => { |
1200 | if !last { |
1201 | return InCell; |
1202 | } |
1203 | }, |
1204 | local_name!("tr" ) => return InRow, |
1205 | local_name!("tbody" ) | local_name!("thead" ) | local_name!("tfoot" ) => { |
1206 | return InTableBody; |
1207 | }, |
1208 | local_name!("caption" ) => return InCaption, |
1209 | local_name!("colgroup" ) => return InColumnGroup, |
1210 | local_name!("table" ) => return InTable, |
1211 | local_name!("template" ) => return *self.template_modes.last().unwrap(), |
1212 | local_name!("head" ) => { |
1213 | if !last { |
1214 | return InHead; |
1215 | } |
1216 | }, |
1217 | local_name!("body" ) => return InBody, |
1218 | local_name!("frameset" ) => return InFrameset, |
1219 | local_name!("html" ) => match self.head_elem { |
1220 | None => return BeforeHead, |
1221 | Some(_) => return AfterHead, |
1222 | }, |
1223 | |
1224 | _ => (), |
1225 | } |
1226 | } |
1227 | InBody |
1228 | } |
1229 | |
1230 | fn close_the_cell(&mut self) { |
1231 | self.generate_implied_end(cursory_implied_end); |
1232 | if self.pop_until(td_th) != 1 { |
1233 | self.sink |
1234 | .parse_error(Borrowed("expected to close <td> or <th> with cell" )); |
1235 | } |
1236 | self.clear_active_formatting_to_marker(); |
1237 | } |
1238 | |
1239 | fn append_text(&mut self, text: StrTendril) -> ProcessResult<Handle> { |
1240 | self.insert_appropriately(AppendText(text), None); |
1241 | Done |
1242 | } |
1243 | |
1244 | fn append_comment(&mut self, text: StrTendril) -> ProcessResult<Handle> { |
1245 | let comment = self.sink.create_comment(text); |
1246 | self.insert_appropriately(AppendNode(comment), None); |
1247 | Done |
1248 | } |
1249 | |
1250 | fn append_comment_to_doc(&mut self, text: StrTendril) -> ProcessResult<Handle> { |
1251 | let comment = self.sink.create_comment(text); |
1252 | self.sink.append(&self.doc_handle, AppendNode(comment)); |
1253 | Done |
1254 | } |
1255 | |
1256 | fn append_comment_to_html(&mut self, text: StrTendril) -> ProcessResult<Handle> { |
1257 | let target = html_elem(&self.open_elems); |
1258 | let comment = self.sink.create_comment(text); |
1259 | self.sink.append(target, AppendNode(comment)); |
1260 | Done |
1261 | } |
1262 | |
1263 | //§ creating-and-inserting-nodes |
1264 | fn create_root(&mut self, attrs: Vec<Attribute>) { |
1265 | let elem = create_element( |
1266 | &mut self.sink, |
1267 | QualName::new(None, ns!(html), local_name!("html" )), |
1268 | attrs, |
1269 | ); |
1270 | self.push(&elem); |
1271 | self.sink.append(&self.doc_handle, AppendNode(elem)); |
1272 | // FIXME: application cache selection algorithm |
1273 | } |
1274 | |
1275 | // https://html.spec.whatwg.org/multipage/#create-an-element-for-the-token |
1276 | fn insert_element( |
1277 | &mut self, |
1278 | push: PushFlag, |
1279 | ns: Namespace, |
1280 | name: LocalName, |
1281 | attrs: Vec<Attribute>, |
1282 | ) -> Handle { |
1283 | declare_tag_set!(form_associatable = |
1284 | "button" "fieldset" "input" "object" |
1285 | "output" "select" "textarea" "img" ); |
1286 | |
1287 | declare_tag_set!(listed = [form_associatable] - "img" ); |
1288 | |
1289 | // Step 7. |
1290 | let qname = QualName::new(None, ns, name); |
1291 | let elem = create_element(&mut self.sink, qname.clone(), attrs.clone()); |
1292 | |
1293 | let insertion_point = self.appropriate_place_for_insertion(None); |
1294 | let (node1, node2) = match insertion_point { |
1295 | LastChild(ref p) | BeforeSibling(ref p) => (p.clone(), None), |
1296 | TableFosterParenting { |
1297 | ref element, |
1298 | ref prev_element, |
1299 | } => (element.clone(), Some(prev_element.clone())), |
1300 | }; |
1301 | |
1302 | // Step 12. |
1303 | if form_associatable(qname.expanded()) && |
1304 | self.form_elem.is_some() && |
1305 | !self.in_html_elem_named(local_name!("template" )) && |
1306 | !(listed(qname.expanded()) && |
1307 | attrs |
1308 | .iter() |
1309 | .any(|a| a.name.expanded() == expanded_name!("" , "form" ))) |
1310 | { |
1311 | let form = self.form_elem.as_ref().unwrap().clone(); |
1312 | let node2 = match node2 { |
1313 | Some(ref n) => Some(n), |
1314 | None => None, |
1315 | }; |
1316 | self.sink.associate_with_form(&elem, &form, (&node1, node2)); |
1317 | } |
1318 | |
1319 | self.insert_at(insertion_point, AppendNode(elem.clone())); |
1320 | |
1321 | match push { |
1322 | Push => self.push(&elem), |
1323 | NoPush => (), |
1324 | } |
1325 | // FIXME: Remove from the stack if we can't append? |
1326 | elem |
1327 | } |
1328 | |
1329 | fn insert_element_for(&mut self, tag: Tag) -> Handle { |
1330 | self.insert_element(Push, ns!(html), tag.name, tag.attrs) |
1331 | } |
1332 | |
1333 | fn insert_and_pop_element_for(&mut self, tag: Tag) -> Handle { |
1334 | self.insert_element(NoPush, ns!(html), tag.name, tag.attrs) |
1335 | } |
1336 | |
1337 | fn insert_phantom(&mut self, name: LocalName) -> Handle { |
1338 | self.insert_element(Push, ns!(html), name, vec![]) |
1339 | } |
1340 | //§ END |
1341 | |
1342 | fn create_formatting_element_for(&mut self, tag: Tag) -> Handle { |
1343 | // FIXME: This really wants unit tests. |
1344 | let mut first_match = None; |
1345 | let mut matches = 0usize; |
1346 | for (i, _, old_tag) in self.active_formatting_end_to_marker() { |
1347 | if tag.equiv_modulo_attr_order(old_tag) { |
1348 | first_match = Some(i); |
1349 | matches += 1; |
1350 | } |
1351 | } |
1352 | |
1353 | if matches >= 3 { |
1354 | self.active_formatting |
1355 | .remove(first_match.expect("matches with no index" )); |
1356 | } |
1357 | |
1358 | let elem = self.insert_element(Push, ns!(html), tag.name.clone(), tag.attrs.clone()); |
1359 | self.active_formatting.push(Element(elem.clone(), tag)); |
1360 | elem |
1361 | } |
1362 | |
1363 | fn clear_active_formatting_to_marker(&mut self) { |
1364 | loop { |
1365 | match self.active_formatting.pop() { |
1366 | None | Some(Marker) => break, |
1367 | _ => (), |
1368 | } |
1369 | } |
1370 | } |
1371 | |
1372 | fn process_end_tag_in_body(&mut self, tag: Tag) { |
1373 | // Look back for a matching open element. |
1374 | let mut match_idx = None; |
1375 | for (i, elem) in self.open_elems.iter().enumerate().rev() { |
1376 | if self.html_elem_named(elem, tag.name.clone()) { |
1377 | match_idx = Some(i); |
1378 | break; |
1379 | } |
1380 | |
1381 | if self.elem_in(elem, special_tag) { |
1382 | self.sink |
1383 | .parse_error(Borrowed("Found special tag while closing generic tag" )); |
1384 | return; |
1385 | } |
1386 | } |
1387 | |
1388 | // Can't use unwrap_or_return!() due to rust-lang/rust#16617. |
1389 | let match_idx = match match_idx { |
1390 | None => { |
1391 | // I believe this is impossible, because the root |
1392 | // <html> element is in special_tag. |
1393 | self.unexpected(&tag); |
1394 | return; |
1395 | }, |
1396 | Some(x) => x, |
1397 | }; |
1398 | |
1399 | self.generate_implied_end_except(tag.name.clone()); |
1400 | |
1401 | if match_idx != self.open_elems.len() - 1 { |
1402 | // mis-nested tags |
1403 | self.unexpected(&tag); |
1404 | } |
1405 | self.open_elems.truncate(match_idx); |
1406 | } |
1407 | |
1408 | fn handle_misnested_a_tags(&mut self, tag: &Tag) { |
1409 | let node = unwrap_or_return!( |
1410 | self.active_formatting_end_to_marker() |
1411 | .filter(|&(_, n, _)| self.html_elem_named(n, local_name!("a" ))) |
1412 | .next() |
1413 | .map(|(_, n, _)| n.clone()), |
1414 | () |
1415 | ); |
1416 | |
1417 | self.unexpected(tag); |
1418 | self.adoption_agency(local_name!("a" )); |
1419 | self.position_in_active_formatting(&node) |
1420 | .map(|index| self.active_formatting.remove(index)); |
1421 | self.remove_from_stack(&node); |
1422 | } |
1423 | |
1424 | //§ tree-construction |
1425 | fn is_foreign(&mut self, token: &Token) -> bool { |
1426 | if let EOFToken = *token { |
1427 | return false; |
1428 | } |
1429 | |
1430 | if self.open_elems.is_empty() { |
1431 | return false; |
1432 | } |
1433 | |
1434 | let name = self.sink.elem_name(self.adjusted_current_node()); |
1435 | if let ns!(html) = *name.ns { |
1436 | return false; |
1437 | } |
1438 | |
1439 | if mathml_text_integration_point(name) { |
1440 | match *token { |
1441 | CharacterTokens(..) | NullCharacterToken => return false, |
1442 | TagToken(Tag { |
1443 | kind: StartTag, |
1444 | ref name, |
1445 | .. |
1446 | }) if !matches!(*name, local_name!("mglyph" ) | local_name!("malignmark" )) => { |
1447 | return false; |
1448 | }, |
1449 | _ => (), |
1450 | } |
1451 | } |
1452 | |
1453 | if svg_html_integration_point(name) { |
1454 | match *token { |
1455 | CharacterTokens(..) | NullCharacterToken => return false, |
1456 | TagToken(Tag { kind: StartTag, .. }) => return false, |
1457 | _ => (), |
1458 | } |
1459 | } |
1460 | |
1461 | if let expanded_name!(mathml "annotation-xml" ) = name { |
1462 | match *token { |
1463 | TagToken(Tag { |
1464 | kind: StartTag, |
1465 | name: local_name!("svg" ), |
1466 | .. |
1467 | }) => return false, |
1468 | CharacterTokens(..) | NullCharacterToken | TagToken(Tag { kind: StartTag, .. }) => { |
1469 | return !self |
1470 | .sink |
1471 | .is_mathml_annotation_xml_integration_point(self.adjusted_current_node()); |
1472 | }, |
1473 | _ => {}, |
1474 | }; |
1475 | } |
1476 | |
1477 | true |
1478 | } |
1479 | //§ END |
1480 | |
1481 | fn enter_foreign(&mut self, mut tag: Tag, ns: Namespace) -> ProcessResult<Handle> { |
1482 | match ns { |
1483 | ns!(mathml) => self.adjust_mathml_attributes(&mut tag), |
1484 | ns!(svg) => self.adjust_svg_attributes(&mut tag), |
1485 | _ => (), |
1486 | } |
1487 | self.adjust_foreign_attributes(&mut tag); |
1488 | |
1489 | if tag.self_closing { |
1490 | self.insert_element(NoPush, ns, tag.name, tag.attrs); |
1491 | DoneAckSelfClosing |
1492 | } else { |
1493 | self.insert_element(Push, ns, tag.name, tag.attrs); |
1494 | Done |
1495 | } |
1496 | } |
1497 | |
1498 | fn adjust_svg_tag_name(&mut self, tag: &mut Tag) { |
1499 | let Tag { ref mut name, .. } = *tag; |
1500 | match *name { |
1501 | local_name!("altglyph" ) => *name = local_name!("altGlyph" ), |
1502 | local_name!("altglyphdef" ) => *name = local_name!("altGlyphDef" ), |
1503 | local_name!("altglyphitem" ) => *name = local_name!("altGlyphItem" ), |
1504 | local_name!("animatecolor" ) => *name = local_name!("animateColor" ), |
1505 | local_name!("animatemotion" ) => *name = local_name!("animateMotion" ), |
1506 | local_name!("animatetransform" ) => *name = local_name!("animateTransform" ), |
1507 | local_name!("clippath" ) => *name = local_name!("clipPath" ), |
1508 | local_name!("feblend" ) => *name = local_name!("feBlend" ), |
1509 | local_name!("fecolormatrix" ) => *name = local_name!("feColorMatrix" ), |
1510 | local_name!("fecomponenttransfer" ) => *name = local_name!("feComponentTransfer" ), |
1511 | local_name!("fecomposite" ) => *name = local_name!("feComposite" ), |
1512 | local_name!("feconvolvematrix" ) => *name = local_name!("feConvolveMatrix" ), |
1513 | local_name!("fediffuselighting" ) => *name = local_name!("feDiffuseLighting" ), |
1514 | local_name!("fedisplacementmap" ) => *name = local_name!("feDisplacementMap" ), |
1515 | local_name!("fedistantlight" ) => *name = local_name!("feDistantLight" ), |
1516 | local_name!("fedropshadow" ) => *name = local_name!("feDropShadow" ), |
1517 | local_name!("feflood" ) => *name = local_name!("feFlood" ), |
1518 | local_name!("fefunca" ) => *name = local_name!("feFuncA" ), |
1519 | local_name!("fefuncb" ) => *name = local_name!("feFuncB" ), |
1520 | local_name!("fefuncg" ) => *name = local_name!("feFuncG" ), |
1521 | local_name!("fefuncr" ) => *name = local_name!("feFuncR" ), |
1522 | local_name!("fegaussianblur" ) => *name = local_name!("feGaussianBlur" ), |
1523 | local_name!("feimage" ) => *name = local_name!("feImage" ), |
1524 | local_name!("femerge" ) => *name = local_name!("feMerge" ), |
1525 | local_name!("femergenode" ) => *name = local_name!("feMergeNode" ), |
1526 | local_name!("femorphology" ) => *name = local_name!("feMorphology" ), |
1527 | local_name!("feoffset" ) => *name = local_name!("feOffset" ), |
1528 | local_name!("fepointlight" ) => *name = local_name!("fePointLight" ), |
1529 | local_name!("fespecularlighting" ) => *name = local_name!("feSpecularLighting" ), |
1530 | local_name!("fespotlight" ) => *name = local_name!("feSpotLight" ), |
1531 | local_name!("fetile" ) => *name = local_name!("feTile" ), |
1532 | local_name!("feturbulence" ) => *name = local_name!("feTurbulence" ), |
1533 | local_name!("foreignobject" ) => *name = local_name!("foreignObject" ), |
1534 | local_name!("glyphref" ) => *name = local_name!("glyphRef" ), |
1535 | local_name!("lineargradient" ) => *name = local_name!("linearGradient" ), |
1536 | local_name!("radialgradient" ) => *name = local_name!("radialGradient" ), |
1537 | local_name!("textpath" ) => *name = local_name!("textPath" ), |
1538 | _ => (), |
1539 | } |
1540 | } |
1541 | |
1542 | fn adjust_attributes<F>(&mut self, tag: &mut Tag, mut map: F) |
1543 | where |
1544 | F: FnMut(LocalName) -> Option<QualName>, |
1545 | { |
1546 | for &mut Attribute { ref mut name, .. } in &mut tag.attrs { |
1547 | if let Some(replacement) = map(name.local.clone()) { |
1548 | *name = replacement; |
1549 | } |
1550 | } |
1551 | } |
1552 | |
1553 | fn adjust_svg_attributes(&mut self, tag: &mut Tag) { |
1554 | self.adjust_attributes(tag, |k| match k { |
1555 | local_name!("attributename" ) => Some(qualname!("" , "attributeName" )), |
1556 | local_name!("attributetype" ) => Some(qualname!("" , "attributeType" )), |
1557 | local_name!("basefrequency" ) => Some(qualname!("" , "baseFrequency" )), |
1558 | local_name!("baseprofile" ) => Some(qualname!("" , "baseProfile" )), |
1559 | local_name!("calcmode" ) => Some(qualname!("" , "calcMode" )), |
1560 | local_name!("clippathunits" ) => Some(qualname!("" , "clipPathUnits" )), |
1561 | local_name!("diffuseconstant" ) => Some(qualname!("" , "diffuseConstant" )), |
1562 | local_name!("edgemode" ) => Some(qualname!("" , "edgeMode" )), |
1563 | local_name!("filterunits" ) => Some(qualname!("" , "filterUnits" )), |
1564 | local_name!("glyphref" ) => Some(qualname!("" , "glyphRef" )), |
1565 | local_name!("gradienttransform" ) => Some(qualname!("" , "gradientTransform" )), |
1566 | local_name!("gradientunits" ) => Some(qualname!("" , "gradientUnits" )), |
1567 | local_name!("kernelmatrix" ) => Some(qualname!("" , "kernelMatrix" )), |
1568 | local_name!("kernelunitlength" ) => Some(qualname!("" , "kernelUnitLength" )), |
1569 | local_name!("keypoints" ) => Some(qualname!("" , "keyPoints" )), |
1570 | local_name!("keysplines" ) => Some(qualname!("" , "keySplines" )), |
1571 | local_name!("keytimes" ) => Some(qualname!("" , "keyTimes" )), |
1572 | local_name!("lengthadjust" ) => Some(qualname!("" , "lengthAdjust" )), |
1573 | local_name!("limitingconeangle" ) => Some(qualname!("" , "limitingConeAngle" )), |
1574 | local_name!("markerheight" ) => Some(qualname!("" , "markerHeight" )), |
1575 | local_name!("markerunits" ) => Some(qualname!("" , "markerUnits" )), |
1576 | local_name!("markerwidth" ) => Some(qualname!("" , "markerWidth" )), |
1577 | local_name!("maskcontentunits" ) => Some(qualname!("" , "maskContentUnits" )), |
1578 | local_name!("maskunits" ) => Some(qualname!("" , "maskUnits" )), |
1579 | local_name!("numoctaves" ) => Some(qualname!("" , "numOctaves" )), |
1580 | local_name!("pathlength" ) => Some(qualname!("" , "pathLength" )), |
1581 | local_name!("patterncontentunits" ) => Some(qualname!("" , "patternContentUnits" )), |
1582 | local_name!("patterntransform" ) => Some(qualname!("" , "patternTransform" )), |
1583 | local_name!("patternunits" ) => Some(qualname!("" , "patternUnits" )), |
1584 | local_name!("pointsatx" ) => Some(qualname!("" , "pointsAtX" )), |
1585 | local_name!("pointsaty" ) => Some(qualname!("" , "pointsAtY" )), |
1586 | local_name!("pointsatz" ) => Some(qualname!("" , "pointsAtZ" )), |
1587 | local_name!("preservealpha" ) => Some(qualname!("" , "preserveAlpha" )), |
1588 | local_name!("preserveaspectratio" ) => Some(qualname!("" , "preserveAspectRatio" )), |
1589 | local_name!("primitiveunits" ) => Some(qualname!("" , "primitiveUnits" )), |
1590 | local_name!("refx" ) => Some(qualname!("" , "refX" )), |
1591 | local_name!("refy" ) => Some(qualname!("" , "refY" )), |
1592 | local_name!("repeatcount" ) => Some(qualname!("" , "repeatCount" )), |
1593 | local_name!("repeatdur" ) => Some(qualname!("" , "repeatDur" )), |
1594 | local_name!("requiredextensions" ) => Some(qualname!("" , "requiredExtensions" )), |
1595 | local_name!("requiredfeatures" ) => Some(qualname!("" , "requiredFeatures" )), |
1596 | local_name!("specularconstant" ) => Some(qualname!("" , "specularConstant" )), |
1597 | local_name!("specularexponent" ) => Some(qualname!("" , "specularExponent" )), |
1598 | local_name!("spreadmethod" ) => Some(qualname!("" , "spreadMethod" )), |
1599 | local_name!("startoffset" ) => Some(qualname!("" , "startOffset" )), |
1600 | local_name!("stddeviation" ) => Some(qualname!("" , "stdDeviation" )), |
1601 | local_name!("stitchtiles" ) => Some(qualname!("" , "stitchTiles" )), |
1602 | local_name!("surfacescale" ) => Some(qualname!("" , "surfaceScale" )), |
1603 | local_name!("systemlanguage" ) => Some(qualname!("" , "systemLanguage" )), |
1604 | local_name!("tablevalues" ) => Some(qualname!("" , "tableValues" )), |
1605 | local_name!("targetx" ) => Some(qualname!("" , "targetX" )), |
1606 | local_name!("targety" ) => Some(qualname!("" , "targetY" )), |
1607 | local_name!("textlength" ) => Some(qualname!("" , "textLength" )), |
1608 | local_name!("viewbox" ) => Some(qualname!("" , "viewBox" )), |
1609 | local_name!("viewtarget" ) => Some(qualname!("" , "viewTarget" )), |
1610 | local_name!("xchannelselector" ) => Some(qualname!("" , "xChannelSelector" )), |
1611 | local_name!("ychannelselector" ) => Some(qualname!("" , "yChannelSelector" )), |
1612 | local_name!("zoomandpan" ) => Some(qualname!("" , "zoomAndPan" )), |
1613 | _ => None, |
1614 | }); |
1615 | } |
1616 | |
1617 | fn adjust_mathml_attributes(&mut self, tag: &mut Tag) { |
1618 | self.adjust_attributes(tag, |k| match k { |
1619 | local_name!("definitionurl" ) => Some(qualname!("" , "definitionURL" )), |
1620 | _ => None, |
1621 | }); |
1622 | } |
1623 | |
1624 | fn adjust_foreign_attributes(&mut self, tag: &mut Tag) { |
1625 | self.adjust_attributes(tag, |k| match k { |
1626 | local_name!("xlink:actuate" ) => Some(qualname!("xlink" xlink "actuate" )), |
1627 | local_name!("xlink:arcrole" ) => Some(qualname!("xlink" xlink "arcrole" )), |
1628 | local_name!("xlink:href" ) => Some(qualname!("xlink" xlink "href" )), |
1629 | local_name!("xlink:role" ) => Some(qualname!("xlink" xlink "role" )), |
1630 | local_name!("xlink:show" ) => Some(qualname!("xlink" xlink "show" )), |
1631 | local_name!("xlink:title" ) => Some(qualname!("xlink" xlink "title" )), |
1632 | local_name!("xlink:type" ) => Some(qualname!("xlink" xlink "type" )), |
1633 | local_name!("xml:base" ) => Some(qualname!("xml" xml "base" )), |
1634 | local_name!("xml:lang" ) => Some(qualname!("xml" xml "lang" )), |
1635 | local_name!("xml:space" ) => Some(qualname!("xml" xml "space" )), |
1636 | local_name!("xmlns" ) => Some(qualname!("" xmlns "xmlns" )), |
1637 | local_name!("xmlns:xlink" ) => Some(qualname!("xmlns" xmlns "xlink" )), |
1638 | _ => None, |
1639 | }); |
1640 | } |
1641 | |
1642 | fn foreign_start_tag(&mut self, mut tag: Tag) -> ProcessResult<Handle> { |
1643 | let current_ns = self.sink.elem_name(self.adjusted_current_node()).ns.clone(); |
1644 | match current_ns { |
1645 | ns!(mathml) => self.adjust_mathml_attributes(&mut tag), |
1646 | ns!(svg) => { |
1647 | self.adjust_svg_tag_name(&mut tag); |
1648 | self.adjust_svg_attributes(&mut tag); |
1649 | }, |
1650 | _ => (), |
1651 | } |
1652 | self.adjust_foreign_attributes(&mut tag); |
1653 | if tag.self_closing { |
1654 | // FIXME(#118): <script /> in SVG |
1655 | self.insert_element(NoPush, current_ns, tag.name, tag.attrs); |
1656 | DoneAckSelfClosing |
1657 | } else { |
1658 | self.insert_element(Push, current_ns, tag.name, tag.attrs); |
1659 | Done |
1660 | } |
1661 | } |
1662 | |
1663 | fn unexpected_start_tag_in_foreign_content(&mut self, tag: Tag) -> ProcessResult<Handle> { |
1664 | self.unexpected(&tag); |
1665 | if self.is_fragment() { |
1666 | self.foreign_start_tag(tag) |
1667 | } else { |
1668 | self.pop(); |
1669 | while !self.current_node_in(|n| { |
1670 | *n.ns == ns!(html) || |
1671 | mathml_text_integration_point(n) || |
1672 | svg_html_integration_point(n) |
1673 | }) { |
1674 | self.pop(); |
1675 | } |
1676 | ReprocessForeign(TagToken(tag)) |
1677 | } |
1678 | } |
1679 | } |
1680 | |