1 | // Copyright 2014-2017 The html5ever Project Developers. See the |
2 | // COPYRIGHT file at the top-level directory of this distribution. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | // option. This file may not be copied, modified, or distributed |
8 | // except according to those terms. |
9 | |
10 | use crate::interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode}; |
11 | use crate::tendril::StrTendril; |
12 | use crate::tokenizer::Doctype; |
13 | |
14 | // These should all be lowercase, for ASCII-case-insensitive matching. |
15 | static QUIRKY_PUBLIC_PREFIXES: &'static [&'static str] = &[ |
16 | "-//advasoft ltd//dtd html 3.0 aswedit + extensions//" , |
17 | "-//as//dtd html 3.0 aswedit + extensions//" , |
18 | "-//ietf//dtd html 2.0 level 1//" , |
19 | "-//ietf//dtd html 2.0 level 2//" , |
20 | "-//ietf//dtd html 2.0 strict level 1//" , |
21 | "-//ietf//dtd html 2.0 strict level 2//" , |
22 | "-//ietf//dtd html 2.0 strict//" , |
23 | "-//ietf//dtd html 2.0//" , |
24 | "-//ietf//dtd html 2.1e//" , |
25 | "-//ietf//dtd html 3.0//" , |
26 | "-//ietf//dtd html 3.2 final//" , |
27 | "-//ietf//dtd html 3.2//" , |
28 | "-//ietf//dtd html 3//" , |
29 | "-//ietf//dtd html level 0//" , |
30 | "-//ietf//dtd html level 1//" , |
31 | "-//ietf//dtd html level 2//" , |
32 | "-//ietf//dtd html level 3//" , |
33 | "-//ietf//dtd html strict level 0//" , |
34 | "-//ietf//dtd html strict level 1//" , |
35 | "-//ietf//dtd html strict level 2//" , |
36 | "-//ietf//dtd html strict level 3//" , |
37 | "-//ietf//dtd html strict//" , |
38 | "-//ietf//dtd html//" , |
39 | "-//metrius//dtd metrius presentational//" , |
40 | "-//microsoft//dtd internet explorer 2.0 html strict//" , |
41 | "-//microsoft//dtd internet explorer 2.0 html//" , |
42 | "-//microsoft//dtd internet explorer 2.0 tables//" , |
43 | "-//microsoft//dtd internet explorer 3.0 html strict//" , |
44 | "-//microsoft//dtd internet explorer 3.0 html//" , |
45 | "-//microsoft//dtd internet explorer 3.0 tables//" , |
46 | "-//netscape comm. corp.//dtd html//" , |
47 | "-//netscape comm. corp.//dtd strict html//" , |
48 | "-//o'reilly and associates//dtd html 2.0//" , |
49 | "-//o'reilly and associates//dtd html extended 1.0//" , |
50 | "-//o'reilly and associates//dtd html extended relaxed 1.0//" , |
51 | "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//" , |
52 | "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//" , |
53 | "-//spyglass//dtd html 2.0 extended//" , |
54 | "-//sq//dtd html 2.0 hotmetal + extensions//" , |
55 | "-//sun microsystems corp.//dtd hotjava html//" , |
56 | "-//sun microsystems corp.//dtd hotjava strict html//" , |
57 | "-//w3c//dtd html 3 1995-03-24//" , |
58 | "-//w3c//dtd html 3.2 draft//" , |
59 | "-//w3c//dtd html 3.2 final//" , |
60 | "-//w3c//dtd html 3.2//" , |
61 | "-//w3c//dtd html 3.2s draft//" , |
62 | "-//w3c//dtd html 4.0 frameset//" , |
63 | "-//w3c//dtd html 4.0 transitional//" , |
64 | "-//w3c//dtd html experimental 19960712//" , |
65 | "-//w3c//dtd html experimental 970421//" , |
66 | "-//w3c//dtd w3 html//" , |
67 | "-//w3o//dtd w3 html 3.0//" , |
68 | "-//webtechs//dtd mozilla html 2.0//" , |
69 | "-//webtechs//dtd mozilla html//" , |
70 | ]; |
71 | |
72 | static QUIRKY_PUBLIC_MATCHES: &'static [&'static str] = &[ |
73 | "-//w3o//dtd w3 html strict 3.0//en//" , |
74 | "-/w3c/dtd html 4.0 transitional/en" , |
75 | "html" , |
76 | ]; |
77 | |
78 | static QUIRKY_SYSTEM_MATCHES: &'static [&'static str] = |
79 | &["http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" ]; |
80 | |
81 | static LIMITED_QUIRKY_PUBLIC_PREFIXES: &'static [&'static str] = &[ |
82 | "-//w3c//dtd xhtml 1.0 frameset//" , |
83 | "-//w3c//dtd xhtml 1.0 transitional//" , |
84 | ]; |
85 | |
86 | static HTML4_PUBLIC_PREFIXES: &'static [&'static str] = &[ |
87 | "-//w3c//dtd html 4.01 frameset//" , |
88 | "-//w3c//dtd html 4.01 transitional//" , |
89 | ]; |
90 | |
91 | pub fn doctype_error_and_quirks(doctype: &Doctype, iframe_srcdoc: bool) -> (bool, QuirksMode) { |
92 | fn opt_string_as_slice<'t>(x: &'t Option<String>) -> Option<&'t str> { |
93 | x.as_ref().map(|y| &y[..]) |
94 | } |
95 | |
96 | fn opt_tendril_as_slice<'t>(x: &'t Option<StrTendril>) -> Option<&'t str> { |
97 | match *x { |
98 | Some(ref t) => Some(t), |
99 | None => None, |
100 | } |
101 | } |
102 | |
103 | fn opt_to_ascii_lower(x: Option<&str>) -> Option<String> { |
104 | x.map(|y| y.to_ascii_lowercase()) |
105 | } |
106 | |
107 | let name = opt_tendril_as_slice(&doctype.name); |
108 | let public = opt_tendril_as_slice(&doctype.public_id); |
109 | let system = opt_tendril_as_slice(&doctype.system_id); |
110 | |
111 | let err = match (name, public, system) { |
112 | (Some("html" ), None, None) | |
113 | (Some("html" ), None, Some("about:legacy-compat" )) | |
114 | (Some("html" ), Some("-//W3C//DTD HTML 4.0//EN" ), None) | |
115 | ( |
116 | Some("html" ), |
117 | Some("-//W3C//DTD HTML 4.0//EN" ), |
118 | Some("http://www.w3.org/TR/REC-html40/strict.dtd" ), |
119 | ) | |
120 | (Some("html" ), Some("-//W3C//DTD HTML 4.01//EN" ), None) | |
121 | ( |
122 | Some("html" ), |
123 | Some("-//W3C//DTD HTML 4.01//EN" ), |
124 | Some("http://www.w3.org/TR/html4/strict.dtd" ), |
125 | ) | |
126 | ( |
127 | Some("html" ), |
128 | Some("-//W3C//DTD XHTML 1.0 Strict//EN" ), |
129 | Some("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" ), |
130 | ) | |
131 | ( |
132 | Some("html" ), |
133 | Some("-//W3C//DTD XHTML 1.1//EN" ), |
134 | Some("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd" ), |
135 | ) => false, |
136 | |
137 | _ => true, |
138 | }; |
139 | |
140 | // FIXME: We could do something asymptotically faster here. |
141 | // But there aren't many strings, and this happens at most once per parse. |
142 | fn contains_pfx(haystack: &[&str], needle: &str) -> bool { |
143 | haystack.iter().any(|&x| needle.starts_with(x)) |
144 | } |
145 | |
146 | // Quirks-mode matches are case-insensitive. |
147 | let public = opt_to_ascii_lower(public); |
148 | let system = opt_to_ascii_lower(system); |
149 | |
150 | let quirk = match (opt_string_as_slice(&public), opt_string_as_slice(&system)) { |
151 | _ if doctype.force_quirks => Quirks, |
152 | _ if name != Some("html" ) => Quirks, |
153 | |
154 | _ if iframe_srcdoc => NoQuirks, |
155 | |
156 | (Some(ref p), _) if QUIRKY_PUBLIC_MATCHES.contains(p) => Quirks, |
157 | (_, Some(ref s)) if QUIRKY_SYSTEM_MATCHES.contains(s) => Quirks, |
158 | |
159 | (Some(p), _) if contains_pfx(QUIRKY_PUBLIC_PREFIXES, p) => Quirks, |
160 | (Some(p), _) if contains_pfx(LIMITED_QUIRKY_PUBLIC_PREFIXES, p) => LimitedQuirks, |
161 | |
162 | (Some(p), s) if contains_pfx(HTML4_PUBLIC_PREFIXES, p) => match s { |
163 | None => Quirks, |
164 | Some(_) => LimitedQuirks, |
165 | }, |
166 | |
167 | _ => NoQuirks, |
168 | }; |
169 | |
170 | (err, quirk) |
171 | } |
172 | |