1extern crate proc_macro;
2
3use quote::quote;
4use syn::{braced, Token};
5
6use std::collections::HashSet;
7use syn::ext::IdentExt;
8use syn::parse::{Parse, ParseStream, Result};
9
10/// Implements the `match_token!()` macro for use by the HTML tree builder
11/// in `src/tree_builder/rules.rs`.
12///
13/// ## Example
14///
15/// ```rust,ignore
16/// match_token!(token {
17/// CommentToken(text) => 1,
18/// tag @ <base> <link> <meta> => 2,
19/// </head> => 3,
20/// </body> </html> </br> => else,
21/// tag @ </_> => 4,
22/// token => 5,
23/// })
24/// ```
25///
26/// ## Syntax
27/// Because of the simplistic parser, the macro invocation must
28/// start with exactly `match_token!(token {` (with whitespace as specified)
29/// and end with exactly `})`.
30/// The left-hand side of each match arm is an optional `name @` binding, followed by
31/// - an ordinary Rust pattern that starts with an identifier or an underscore, or
32/// - a sequence of HTML tag names as identifiers, each inside "<...>" or "</...>"
33/// to match an open or close tag respectively, or
34/// - a "wildcard tag" "<_>" or "</_>" to match all open tags or all close tags
35/// respectively.
36///
37/// The right-hand side is either an expression or the keyword `else`.
38/// Note that this syntax does not support guards or pattern alternation like
39/// `Foo | Bar`. This is not a fundamental limitation; it's done for implementation
40/// simplicity.
41/// ## Semantics
42/// Ordinary Rust patterns match as usual. If present, the `name @` binding has
43/// the usual meaning.
44/// A sequence of named tags matches any of those tags. A single sequence can
45/// contain both open and close tags. If present, the `name @` binding binds (by
46/// move) the `Tag` struct, not the outer `Token`. That is, a match arm like
47/// ```rust,ignore
48/// tag @ <html> <head> => ...
49/// ```
50/// expands to something like
51/// ```rust,ignore
52/// TagToken(tag @ Tag { name: local_name!("html"), kind: StartTag })
53/// | TagToken(tag @ Tag { name: local_name!("head"), kind: StartTag }) => ...
54/// ```
55/// A wildcard tag matches any tag of the appropriate kind, *unless* it was
56/// previously matched with an `else` right-hand side (more on this below).
57/// The expansion of this macro reorders code somewhat, to satisfy various
58/// restrictions arising from moves. However it provides the semantics of in-order
59/// matching, by enforcing the following restrictions on its input:
60/// - The last pattern must be a variable or the wildcard "_". In other words
61/// it must match everything.
62/// - Otherwise, ordinary Rust patterns and specific-tag patterns cannot appear
63/// after wildcard tag patterns.
64/// - No tag name may appear more than once.
65/// - A wildcard tag pattern may not occur in the same arm as any other tag.
66/// "<_> <html> => ..." and "<_> </_> => ..." are both forbidden.
67/// - The right-hand side "else" may only appear with specific-tag patterns.
68/// It means that these specific tags should be handled by the last,
69/// catch-all case arm, rather than by any wildcard tag arm. This situation
70/// is common in the HTML5 syntax.
71#[proc_macro]
72pub fn match_token(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
73 let input: TokenStream = proc_macro2::TokenStream::from(input);
74
75 let match_token: MatchToken = syn::parse2::<MatchToken>(input).expect(msg:"Parsing match_token! input failed");
76 let output: TokenStream = expand_match_token_macro(match_token);
77
78 proc_macro::TokenStream::from(output)
79}
80
81struct MatchToken {
82 ident: syn::Ident,
83 arms: Vec<MatchTokenArm>,
84}
85
86struct MatchTokenArm {
87 binding: Option<syn::Ident>,
88 lhs: Lhs,
89 rhs: Rhs,
90}
91
92enum Lhs {
93 Tags(Vec<Tag>),
94 Pattern(syn::Pat),
95}
96
97enum Rhs {
98 Expression(syn::Expr),
99 Else,
100}
101
102#[derive(PartialEq, Eq, Hash, Clone)]
103enum TagKind {
104 StartTag,
105 EndTag,
106}
107
108// Option is None if wildcard
109#[derive(PartialEq, Eq, Hash, Clone)]
110struct Tag {
111 kind: TagKind,
112 name: Option<syn::Ident>,
113}
114
115impl Parse for Tag {
116 fn parse(input: ParseStream) -> Result<Self> {
117 input.parse::<Token![<]>()?;
118 let closing: Option<Token![/]> = input.parse()?;
119 let name: Option = match input.call(function:syn::Ident::parse_any)? {
120 ref wildcard: &Ident if wildcard == "_" => None,
121 other: Ident => Some(other),
122 };
123 input.parse::<Token![>]>()?;
124 Ok(Tag {
125 kind: if closing.is_some() {
126 TagKind::EndTag
127 } else {
128 TagKind::StartTag
129 },
130 name,
131 })
132 }
133}
134
135impl Parse for Lhs {
136 fn parse(input: ParseStream) -> Result<Self> {
137 if input.peek(Token![<]) {
138 let mut tags: Vec = Vec::new();
139 while !input.peek(Token![=>]) {
140 tags.push(input.parse()?);
141 }
142 Ok(Lhs::Tags(tags))
143 } else {
144 let p: Pat = input.call(function:syn::Pat::parse_single)?;
145 Ok(Lhs::Pattern(p))
146 }
147 }
148}
149
150impl Parse for MatchTokenArm {
151 fn parse(input: ParseStream) -> Result<Self> {
152 let binding = if input.peek2(Token![@]) {
153 let binding = input.parse::<syn::Ident>()?;
154 input.parse::<Token![@]>()?;
155 Some(binding)
156 } else {
157 None
158 };
159 let lhs = input.parse::<Lhs>()?;
160 input.parse::<Token![=>]>()?;
161 let rhs = if input.peek(syn::token::Brace) {
162 let block = input.parse::<syn::Block>().unwrap();
163 let block = syn::ExprBlock {
164 attrs: vec![],
165 label: None,
166 block,
167 };
168 input.parse::<Option<Token![,]>>()?;
169 Rhs::Expression(syn::Expr::Block(block))
170 } else if input.peek(Token![else]) {
171 input.parse::<Token![else]>()?;
172 input.parse::<Token![,]>()?;
173 Rhs::Else
174 } else {
175 let expr = input.parse::<syn::Expr>().unwrap();
176 input.parse::<Option<Token![,]>>()?;
177 Rhs::Expression(expr)
178 };
179
180 Ok(MatchTokenArm { binding, lhs, rhs })
181 }
182}
183
184impl Parse for MatchToken {
185 fn parse(input: ParseStream) -> Result<Self> {
186 let ident: Ident = input.parse::<syn::Ident>()?;
187 let content: ParseBuffer<'_>;
188 braced!(content in input);
189 let mut arms: Vec = vec![];
190 while !content.is_empty() {
191 arms.push(content.parse()?);
192 }
193 Ok(MatchToken { ident, arms })
194 }
195}
196
197fn expand_match_token_macro(match_token: MatchToken) -> proc_macro2::TokenStream {
198 let mut arms = match_token.arms;
199 let to_be_matched = match_token.ident;
200 // Handle the last arm specially at the end.
201 let last_arm = arms.pop().unwrap();
202
203 // Tags we've seen, used for detecting duplicates.
204 let mut seen_tags: HashSet<Tag> = HashSet::new();
205
206 // Case arms for wildcard matching. We collect these and
207 // emit them later.
208 let mut wildcards_patterns: Vec<proc_macro2::TokenStream> = Vec::new();
209 let mut wildcards_expressions: Vec<syn::Expr> = Vec::new();
210
211 // Tags excluded (by an 'else' RHS) from wildcard matching.
212 let mut wild_excluded_patterns: Vec<proc_macro2::TokenStream> = Vec::new();
213
214 let mut arms_code = Vec::new();
215
216 for MatchTokenArm { binding, lhs, rhs } in arms {
217 // Build Rust syntax for the `name @` binding, if any.
218 let binding = match binding {
219 Some(ident) => quote!(#ident @),
220 None => quote!(),
221 };
222
223 match (lhs, rhs) {
224 (Lhs::Pattern(_), Rhs::Else) => {
225 panic!("'else' may not appear with an ordinary pattern")
226 },
227
228 // ordinary pattern => expression
229 (Lhs::Pattern(pat), Rhs::Expression(expr)) => {
230 if !wildcards_patterns.is_empty() {
231 panic!("ordinary patterns may not appear after wildcard tags");
232 }
233 arms_code.push(quote!(#binding #pat => #expr,))
234 },
235
236 // <tag> <tag> ... => else
237 (Lhs::Tags(tags), Rhs::Else) => {
238 for tag in tags {
239 if !seen_tags.insert(tag.clone()) {
240 panic!("duplicate tag");
241 }
242 if tag.name.is_none() {
243 panic!("'else' may not appear with a wildcard tag");
244 }
245 wild_excluded_patterns
246 .push(make_tag_pattern(&proc_macro2::TokenStream::new(), tag));
247 }
248 },
249
250 // <_> => expression
251 // <tag> <tag> ... => expression
252 (Lhs::Tags(tags), Rhs::Expression(expr)) => {
253 // Is this arm a tag wildcard?
254 // `None` if we haven't processed the first tag yet.
255 let mut wildcard = None;
256 for tag in tags {
257 if !seen_tags.insert(tag.clone()) {
258 panic!("duplicate tag");
259 }
260
261 match tag.name {
262 // <tag>
263 Some(_) => {
264 if !wildcards_patterns.is_empty() {
265 panic!("specific tags may not appear after wildcard tags");
266 }
267
268 if wildcard == Some(true) {
269 panic!("wildcard tags must appear alone");
270 }
271
272 if wildcard.is_some() {
273 // Push the delimiter `|` if it's not the first tag.
274 arms_code.push(quote!( | ))
275 }
276 arms_code.push(make_tag_pattern(&binding, tag));
277
278 wildcard = Some(false);
279 },
280
281 // <_>
282 None => {
283 if wildcard.is_some() {
284 panic!("wildcard tags must appear alone");
285 }
286 wildcard = Some(true);
287 wildcards_patterns.push(make_tag_pattern(&binding, tag));
288 wildcards_expressions.push(expr.clone());
289 },
290 }
291 }
292
293 match wildcard {
294 None => panic!("[internal macro error] tag arm with no tags"),
295 Some(false) => arms_code.push(quote!( => #expr,)),
296 Some(true) => {}, // codegen for wildcards is deferred
297 }
298 },
299 }
300 }
301
302 // Time to process the last, catch-all arm. We will generate something like
303 //
304 // last_arm_token => {
305 // let enable_wildcards = match last_arm_token {
306 // TagToken(Tag { kind: EndTag, name: local_name!("body"), .. }) => false,
307 // TagToken(Tag { kind: EndTag, name: local_name!("html"), .. }) => false,
308 // // ...
309 // _ => true,
310 // };
311 //
312 // match (enable_wildcards, last_arm_token) {
313 // (true, TagToken(name @ Tag { kind: StartTag, .. }))
314 // => ..., // wildcard action for start tags
315 //
316 // (true, TagToken(name @ Tag { kind: EndTag, .. }))
317 // => ..., // wildcard action for end tags
318 //
319 // (_, token) => ... // using the pattern from that last arm
320 // }
321 // }
322
323 let MatchTokenArm { binding, lhs, rhs } = last_arm;
324
325 let (last_pat, last_expr) = match (binding, lhs, rhs) {
326 (Some(_), _, _) => panic!("the last arm cannot have an @-binding"),
327 (None, Lhs::Tags(_), _) => panic!("the last arm cannot have tag patterns"),
328 (None, _, Rhs::Else) => panic!("the last arm cannot use 'else'"),
329 (None, Lhs::Pattern(p), Rhs::Expression(e)) => (p, e),
330 };
331
332 quote! {
333 match #to_be_matched {
334 #(
335 #arms_code
336 )*
337 last_arm_token => {
338 let enable_wildcards = match last_arm_token {
339 #(
340 #wild_excluded_patterns => false,
341 )*
342 _ => true,
343 };
344 match (enable_wildcards, last_arm_token) {
345 #(
346 (true, #wildcards_patterns) => #wildcards_expressions,
347 )*
348 (_, #last_pat) => #last_expr,
349 }
350 }
351 }
352 }
353}
354
355fn make_tag_pattern(binding: &proc_macro2::TokenStream, tag: Tag) -> proc_macro2::TokenStream {
356 let kind: TokenStream = match tag.kind {
357 TagKind::StartTag => quote!(crate::tokenizer::StartTag),
358 TagKind::EndTag => quote!(crate::tokenizer::EndTag),
359 };
360 let name_field: TokenStream = if let Some(name: Ident) = tag.name {
361 let name: String = name.to_string();
362 quote!(name: local_name!(#name),)
363 } else {
364 quote!()
365 };
366 quote! {
367 crate::tree_builder::types::TagToken(#binding crate::tokenizer::Tag { kind: #kind, #name_field .. })
368 }
369}
370